Я пытаюсь сделать что-то действительно простое, но не могу.

У меня есть этот фрейм данных:

structure(list(First_ID = c(1L, 2L, 4L, 5L, 6L, 8L, 
9L, 20L), First_Lat = c(14.60543375, 14.60928364, 14.60928364, 
14.55621093, 14.50756, 14.5802, 14.5802, 14.66019), First_Lon = c(-90.53911871, 
-90.53998477, -90.53998477, -90.54753174, -90.47934, -90.54794, 
-90.54794, -90.49326), First_11.Territrorios = c("Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate")), class = "data.frame", row.names = c(NA, 
-8L))

И я хочу создать еще один со всеми возможными совпадениями, чтобы он выглядел так:

structure(list(X.U.FEFF.First_ID = c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 
6L, 6L, 6L, 8L, 8L, 9L), First_Lat = c(14.60543375, 14.60543375, 
14.60543375, 14.60543375, 14.60543375, 14.60543375, 14.60543375, 
14.60928364, 14.60928364, 14.60928364, 14.60928364, 14.60928364, 
14.60928364, 14.60928364, 14.60928364, 14.60928364, 14.60928364, 
14.60928364, 14.55621093, 14.55621093, 14.55621093, 14.55621093, 
14.50756, 14.50756, 14.50756, 14.5802, 14.5802, 14.5802), First_Lon = c(-90.53911871, 
-90.53911871, -90.53911871, -90.53911871, -90.53911871, -90.53911871, 
-90.53911871, -90.53998477, -90.53998477, -90.53998477, -90.53998477, 
-90.53998477, -90.53998477, -90.53998477, -90.53998477, -90.53998477, 
-90.53998477, -90.53998477, -90.54753174, -90.54753174, -90.54753174, 
-90.54753174, -90.47934, -90.47934, -90.47934, -90.54794, -90.54794, 
-90.54794), First_11.Territrorios = c("Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate"), Second_ID = c(2L, 4L, 5L, 
6L, 8L, 9L, 20L, 4L, 5L, 6L, 8L, 9L, 20L, 5L, 6L, 8L, 9L, 20L, 
6L, 8L, 9L, 20L, 8L, 9L, 20L, 9L, 20L, 20L), Second_Lat = c(14.60928364, 
14.60928364, 14.55621093, 14.50756, 14.5802, 14.5802, 14.66019, 
14.60928364, 14.55621093, 14.50756, 14.5802, 14.5802, 14.66019, 
14.55621093, 14.50756, 14.5802, 14.5802, 14.66019, 14.50756, 
14.5802, 14.5802, 14.66019, 14.5802, 14.5802, 14.66019, 14.5802, 
14.66019, 14.66019), Second_Lon = c(-90.53998477, -90.53998477, 
-90.54753174, -90.47934, -90.54794, -90.54794, -90.49326, -90.53998477, 
-90.54753174, -90.47934, -90.54794, -90.54794, -90.49326, -90.54753174, 
-90.47934, -90.54794, -90.54794, -90.49326, -90.47934, -90.54794, 
-90.54794, -90.49326, -90.54794, -90.54794, -90.49326, -90.54794, 
-90.49326, -90.49326), Second_11.Territrorios = c("Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate")), class = "data.frame", row.names = c(NA, 
-28L))

Повторяя каждую строку друг с другом, пока не останется больше пар! - Я пробовал использовать функцию combn но он дает мне список, в котором все пары для каждого столбца создаются как новый объект, и мне нужен только один фрейм данных, делающий это для всех. Также пробовал использовать expand.grid, но это дает мне эта ошибка:

Error in paste0(nmc[i], "=", if (is.numeric(x)) format(x) else x) : 
  cannot coerce type 'closure' to vector of type 'character'

Может кто-нибудь подскажет?

1
Melania CB 17 Сен 2021 в 17:02

2 ответа

Лучший ответ

Попробуйте combn, как показано ниже

do.call(
  rbind,
  combn(nrow(df),
    2,
    function(x) {
      cbind(
        df[x[1], ],
        setNames(df[x[2], ], gsub("First", "Second", names(df)))
      )
    },
    simplify = FALSE
  )
)

Который дает

   First_ID First_Lat First_Lon First_11.Territrorios Second_ID Second_Lat
1         1  14.60543 -90.53912                 Guate         2   14.60928
2         1  14.60543 -90.53912                 Guate         4   14.60928
3         1  14.60543 -90.53912                 Guate         5   14.55621
4         1  14.60543 -90.53912                 Guate         6   14.50756
5         1  14.60543 -90.53912                 Guate         8   14.58020
6         1  14.60543 -90.53912                 Guate         9   14.58020
7         1  14.60543 -90.53912                 Guate        20   14.66019
21        2  14.60928 -90.53998                 Guate         4   14.60928
22        2  14.60928 -90.53998                 Guate         5   14.55621
23        2  14.60928 -90.53998                 Guate         6   14.50756
24        2  14.60928 -90.53998                 Guate         8   14.58020
25        2  14.60928 -90.53998                 Guate         9   14.58020
26        2  14.60928 -90.53998                 Guate        20   14.66019
31        4  14.60928 -90.53998                 Guate         5   14.55621
32        4  14.60928 -90.53998                 Guate         6   14.50756
33        4  14.60928 -90.53998                 Guate         8   14.58020
34        4  14.60928 -90.53998                 Guate         9   14.58020
35        4  14.60928 -90.53998                 Guate        20   14.66019
41        5  14.55621 -90.54753                 Guate         6   14.50756
42        5  14.55621 -90.54753                 Guate         8   14.58020
43        5  14.55621 -90.54753                 Guate         9   14.58020
44        5  14.55621 -90.54753                 Guate        20   14.66019
51        6  14.50756 -90.47934                 Guate         8   14.58020
52        6  14.50756 -90.47934                 Guate         9   14.58020
53        6  14.50756 -90.47934                 Guate        20   14.66019
61        8  14.58020 -90.54794                 Guate         9   14.58020
62        8  14.58020 -90.54794                 Guate        20   14.66019
71        9  14.58020 -90.54794                 Guate        20   14.66019
   Second_Lon Second_11.Territrorios
1   -90.53998                  Guate
2   -90.53998                  Guate
3   -90.54753                  Guate
4   -90.47934                  Guate
5   -90.54794                  Guate
6   -90.54794                  Guate
7   -90.49326                  Guate
21  -90.53998                  Guate
22  -90.54753                  Guate
23  -90.47934                  Guate
24  -90.54794                  Guate
25  -90.54794                  Guate
26  -90.49326                  Guate
31  -90.54753                  Guate
32  -90.47934                  Guate
33  -90.54794                  Guate
34  -90.54794                  Guate
35  -90.49326                  Guate
41  -90.47934                  Guate
42  -90.54794                  Guate
43  -90.54794                  Guate
44  -90.49326                  Guate
51  -90.54794                  Guate
52  -90.54794                  Guate
53  -90.49326                  Guate
61  -90.54794                  Guate
62  -90.49326                  Guate
71  -90.49326                  Guate
1
ThomasIsCoding 17 Сен 2021 в 14:15

Один метод:

combinations <- combn(8,2)
combinations
#      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28]
# [1,]    1    1    1    1    1    1    1    2    2     2     2     2     2     3     3     3     3     3     4     4     4     4     5     5     5     6     6     7
# [2,]    2    3    4    5    6    7    8    3    4     5     6     7     8     4     5     6     7     8     5     6     7     8     6     7     8     7     8     8
out <- cbind(dat[combinations[1,],], dat[combinations[2,],])

names(out) <- c(names(dat), gsub("First", "Second", names(dat)))
out
#     First_ID First_Lat First_Lon First_11.Territrorios Second_ID Second_Lat Second_Lon Second_11.Territrorios
# 1          1  14.60543 -90.53912                 Guate         2   14.60928  -90.53998                  Guate
# 1.1        1  14.60543 -90.53912                 Guate         4   14.60928  -90.53998                  Guate
# 1.2        1  14.60543 -90.53912                 Guate         5   14.55621  -90.54753                  Guate
# 1.3        1  14.60543 -90.53912                 Guate         6   14.50756  -90.47934                  Guate
# 1.4        1  14.60543 -90.53912                 Guate         8   14.58020  -90.54794                  Guate
# 1.5        1  14.60543 -90.53912                 Guate         9   14.58020  -90.54794                  Guate
# 1.6        1  14.60543 -90.53912                 Guate        20   14.66019  -90.49326                  Guate
# 2          2  14.60928 -90.53998                 Guate         4   14.60928  -90.53998                  Guate
# 2.1        2  14.60928 -90.53998                 Guate         5   14.55621  -90.54753                  Guate
# 2.2        2  14.60928 -90.53998                 Guate         6   14.50756  -90.47934                  Guate
# 2.3        2  14.60928 -90.53998                 Guate         8   14.58020  -90.54794                  Guate
# 2.4        2  14.60928 -90.53998                 Guate         9   14.58020  -90.54794                  Guate
# 2.5        2  14.60928 -90.53998                 Guate        20   14.66019  -90.49326                  Guate
# 3          4  14.60928 -90.53998                 Guate         5   14.55621  -90.54753                  Guate
# 3.1        4  14.60928 -90.53998                 Guate         6   14.50756  -90.47934                  Guate
# 3.2        4  14.60928 -90.53998                 Guate         8   14.58020  -90.54794                  Guate
# 3.3        4  14.60928 -90.53998                 Guate         9   14.58020  -90.54794                  Guate
# 3.4        4  14.60928 -90.53998                 Guate        20   14.66019  -90.49326                  Guate
# 4          5  14.55621 -90.54753                 Guate         6   14.50756  -90.47934                  Guate
# 4.1        5  14.55621 -90.54753                 Guate         8   14.58020  -90.54794                  Guate
# 4.2        5  14.55621 -90.54753                 Guate         9   14.58020  -90.54794                  Guate
# 4.3        5  14.55621 -90.54753                 Guate        20   14.66019  -90.49326                  Guate
# 5          6  14.50756 -90.47934                 Guate         8   14.58020  -90.54794                  Guate
# 5.1        6  14.50756 -90.47934                 Guate         9   14.58020  -90.54794                  Guate
# 5.2        6  14.50756 -90.47934                 Guate        20   14.66019  -90.49326                  Guate
# 6          8  14.58020 -90.54794                 Guate         9   14.58020  -90.54794                  Guate
# 6.1        8  14.58020 -90.54794                 Guate        20   14.66019  -90.49326                  Guate
# 7          9  14.58020 -90.54794                 Guate        20   14.66019  -90.49326                  Guate

Другой метод, хотя и менее эффективный, потому что он выполняет полное декартово соединение перед фильтрацией:

subset(merge(dat, dat, by = NULL), First_ID.x < First_ID.y)
#    First_ID.x First_Lat.x First_Lon.x First_11.Territrorios.x First_ID.y First_Lat.y First_Lon.y First_11.Territrorios.y
# 9           1    14.60543   -90.53912                   Guate          2    14.60928   -90.53998                   Guate
# 17          1    14.60543   -90.53912                   Guate          4    14.60928   -90.53998                   Guate
# 18          2    14.60928   -90.53998                   Guate          4    14.60928   -90.53998                   Guate
# 25          1    14.60543   -90.53912                   Guate          5    14.55621   -90.54753                   Guate
# 26          2    14.60928   -90.53998                   Guate          5    14.55621   -90.54753                   Guate
# 27          4    14.60928   -90.53998                   Guate          5    14.55621   -90.54753                   Guate
# 33          1    14.60543   -90.53912                   Guate          6    14.50756   -90.47934                   Guate
# 34          2    14.60928   -90.53998                   Guate          6    14.50756   -90.47934                   Guate
# 35          4    14.60928   -90.53998                   Guate          6    14.50756   -90.47934                   Guate
# 36          5    14.55621   -90.54753                   Guate          6    14.50756   -90.47934                   Guate
# 41          1    14.60543   -90.53912                   Guate          8    14.58020   -90.54794                   Guate
# 42          2    14.60928   -90.53998                   Guate          8    14.58020   -90.54794                   Guate
# 43          4    14.60928   -90.53998                   Guate          8    14.58020   -90.54794                   Guate
# 44          5    14.55621   -90.54753                   Guate          8    14.58020   -90.54794                   Guate
# 45          6    14.50756   -90.47934                   Guate          8    14.58020   -90.54794                   Guate
# 49          1    14.60543   -90.53912                   Guate          9    14.58020   -90.54794                   Guate
# 50          2    14.60928   -90.53998                   Guate          9    14.58020   -90.54794                   Guate
# 51          4    14.60928   -90.53998                   Guate          9    14.58020   -90.54794                   Guate
# 52          5    14.55621   -90.54753                   Guate          9    14.58020   -90.54794                   Guate
# 53          6    14.50756   -90.47934                   Guate          9    14.58020   -90.54794                   Guate
# 54          8    14.58020   -90.54794                   Guate          9    14.58020   -90.54794                   Guate
# 57          1    14.60543   -90.53912                   Guate         20    14.66019   -90.49326                   Guate
# 58          2    14.60928   -90.53998                   Guate         20    14.66019   -90.49326                   Guate
# 59          4    14.60928   -90.53998                   Guate         20    14.66019   -90.49326                   Guate
# 60          5    14.55621   -90.54753                   Guate         20    14.66019   -90.49326                   Guate
# 61          6    14.50756   -90.47934                   Guate         20    14.66019   -90.49326                   Guate
# 62          8    14.58020   -90.54794                   Guate         20    14.66019   -90.49326                   Guate
# 63          9    14.58020   -90.54794                   Guate         20    14.66019   -90.49326                   Guate

Названия строк отвлекают, но безвредны. Имена столбцов можно обновить, как в первом примере.

Самая большая разница между выполнением cbind снаружи combn (как указано выше) и внутри с использованием FUN= заключается в том, что выполнение этого снаружи делает позвонить в cbind; внутри он делает в этом примере 28 вызовов cbind, а затем объединяет результаты. Хотя производительность не является проблемой для этого набора данных, безусловно, менее эффективно перебирать каждую комбинацию.

1
r2evans 17 Сен 2021 в 14:19