Ниже у меня есть следующий набор данных (разделенный табуляцией для воспроизводимого примера). Я использую набор функций dplyr для добавления четырех столбцов во фрейм данных. В первых трех столбцах будет указано, является ли наблюдение последним таким наблюдением для групп 1) ID, 2) gene и 3) gene + exon. Я более или менее получил два последних, но по какой-то причине не могу получить первый, который указывает на то, что наблюдение является последним в своей группе ID.

Мой код для генерации флагов для последних наблюдений в 2) gene и 3) gene + exon:

df2 <- df %>% 
            dplyr::group_by(gene) %>%
            dplyr::mutate(lastObsFlagG = ifelse(is.na(replace(gene, n(), 1)),1,0)) %>%
            ungroup %>%
            dplyr::group_by(gene,exon) %>%
            dplyr::mutate(lastObsFlagGE = ifelse(is.na(replace(gene, n(), 1)),1,0)) %>%
            data.frame()

Для 1) ID я пробовал

dplyr::group_by(ID) %>%
dplyr::mutate(lastObsFlagID = ifelse(row_number()==1, "1", "0"))

Но это не дает значения 1 при последнем наблюдении.


Набор данных

"ID"    "gene"  "exon"  "mutation"
"TCGA-AN-A046"  "OR4F5" "E1"    69767
"TCGA-A2-A0CP"  "SAMD11"    "E2"    925952
"TCGA-A8-A08H"  "NOC2L" "E5"    956126
"TCGA-GM-A2DM"  "NOC2L" "E4"    956911
"TCGA-GM-A2DM"  "NOC2L" "E4"    956912
"TCGA-D8-A1XM"  "KLHL17"    "E3"    961658
"TCGA-BH-A18G"  "KLHL17"    "E5"    962441
"TCGA-3C-AALI"  "KLHL17"    "E8"    963353
"TCGA-AC-A62Y"  "KLHL17"    "E9"    964004
"TCGA-AR-A2LE"  "PLEKHN1"   "E1"    966556
"TCGA-E2-A14N"  "PLEKHN1"   "E5"    970728
"TCGA-AO-A0J4"  "PLEKHN1"   "E12"   973506
"TCGA-D8-A1J9"  "HES4"  "E3"    999551
"TCGA-EW-A1PH"  "ISG15" "E2"    1014276
"TCGA-A2-A0T0"  "AGRN"  "E2"    1022338
"TCGA-GM-A2DD"  "AGRN"  "E3"    1035303
"TCGA-5L-AAT1"  "AGRN"  "E4"    1040690
"TCGA-OL-A5RW"  "AGRN"  "E8"    1043314
"TCGA-D8-A27M"  "AGRN"  "E25"   1049355
"TCGA-AR-A1AI"  "AGRN"  "E29"   1050430
"TCGA-5L-AAT0"  "AGRN"  "E36"   1055374
"TCGA-5L-AAT0"  "AGRN"  "E36"   1055376
"TCGA-C8-A8HP"  "AGRN"  "E36"   1055442
"TCGA-A7-A4SD"  "TTLL10"    "E13"   1184971
"TCGA-BH-A1F0"  "SDF4"  "E4"    1223283
"TCGA-AO-A128"  "SDF4"  "E4"    1223330
"TCGA-E9-A1R0"  "SDF4"  "E2"    1228592
"TCGA-A2-A04P"  "UBE2J2"    "E7"    1255246
"TCGA-C8-A274"  "UBE2J2"    "E7"    1255342
"TCGA-5L-AAT1"  "SCNN1D"    "E1"    1281422
"TCGA-AO-A128"  "SCNN1D"    "E6"    1287116
"TCGA-E2-A15R"  "SCNN1D"    "E7"    1287596
"TCGA-AC-A62V"  "SCNN1D"    "E11"   1290543
"TCGA-BH-A18V"  "ACAP3" "E22"   1294187
"TCGA-A7-A6VX"  "ACAP3" "E6"    1300640
"TCGA-GM-A2DB"  "ACAP3" "E3"    1303170
"TCGA-EW-A1IY"  "ACAP3" "E3"    1303176
"TCGA-D8-A1XQ"  "CPSF3L"    "E9"    1313879
"TCGA-5L-AAT1"  "CPSF3L"    "E9"    1313888
"TCGA-C8-A26Y"  "CPSF3L"    "E7"    1314919
"TCGA-D8-A1XK"  "CPSF3L"    "E2"    1321057
"TCGA-AO-A128"  "TAS1R3"    "E2"    1331863
"TCGA-A8-A07P"  "TAS1R3"    "E6"    1334323
"TCGA-A7-A0DA"  "DVL1"  "E14"   1338066
"TCGA-C8-A8HQ"  "DVL1"  "E10"   1339589
"TCGA-BH-A18T"  "DVL1"  "E8"    1340130
"TCGA-C8-A12V"  "MXRA8" "E6"    1354445
"TCGA-C8-A3M8"  "AURKAIP1"  "E2"    1374747
"TCGA-BH-A0B6"  "CCNL2" "E11"   1387308
"TCGA-A8-A09Z"  "CCNL2" "E4"    1395413
"TCGA-AC-A23H"  "MRPL20"    "E4"    1402084
"TCGA-BH-A1FU"  "MRPL20"    "E4"    1402116
"TCGA-BH-A0W4"  "MRPL20"    "E4"    1402194
"TCGA-AR-A1AH"  "MRPL20"    "E4"    1402205
"TCGA-A8-A06Q"  "ANKRD65"   "E1"    1420868
"TCGA-AC-A8OQ"  "ATAD3C"    "E1"    1450566
"TCGA-A2-A25A"  "ATAD3C"    "E11"   1462661
"TCGA-AR-A5QQ"  "ATAD3B"    "E7"    1482563
"TCGA-AO-A1KS"  "ATAD3B"    "E12"   1487900
"TCGA-AO-A124"  "ATAD3B"    "E15"   1490662
"TCGA-A7-A56D"  "ATAD3B"    "E16"   1495857
"TCGA-D8-A27N"  "ATAD3B"    "E16"   1495961
"TCGA-AR-A2LH"  "ATAD3A"    "E5"    1518929
"TCGA-EW-A1OY"  "ATAD3A"    "E12"   1525277
"TCGA-AO-A128"  "ATAD3A"    "E16"   1533987
"TCGA-BH-A1FM"  "SSU72" "E3"    1544993
"TCGA-C8-A12Y"  "MIB2"  "E4"    1623874
"TCGA-AC-A23H"  "MIB2"  "E7"    1625321
"TCGA-B6-A0RV"  "MIB2"  "E10"   1626960
"TCGA-E2-A1LG"  "MIB2"  "E19"   1629667
"TCGA-C8-A1HI"  "SLC35E2B"  "E8"    1668398
"TCGA-A2-A0CR"  "SLC35E2B"  "E8"    1668406
"TCGA-PL-A8LZ"  "SLC35E2B"  "E7"    1669727
"TCGA-C8-A3M7"  "CDK11A"    "E17"   1703847
"TCGA-D8-A1JP"  "CDK11A"    "E17"   1703884
"TCGA-C8-A1HK"  "CDK11A"    "E17"   1703915
"TCGA-D8-A27G"  "CDK11A"    "E15"   1704265
"TCGA-5L-AAT1"  "CDK11A"    "E13"   1705003
"TCGA-D8-A1JE"  "CDK11A"    "E11"   1707419
"TCGA-BH-A18P"  "CDK11A"    "E5"    1716352
"TCGA-D8-A27G"  "CDK11A"    "E5"    1716387
"TCGA-EW-A6SD"  "CDK11A"    "E5"    1716477
"TCGA-BH-A0W4"  "CDK11A"    "E3"    1721601
"TCGA-AN-A049"  "CDK11A"    "E3"    1721603
"TCGA-D8-A1XK"  "SLC35E2"   "E3"    1739030
"TCGA-LL-A5YM"  "SLC35E2"   "E1"    1745772
"TCGA-A7-A26H"  "NADK"  "E5"    1756596
"TCGA-AO-A128"  "GNB1"  "E5"    1815804
"TCGA-A2-A3Y0"  "CALML6"    "E4"    1916819
"TCGA-AR-A0U3"  "CALML6"    "E4"    1916828
"TCGA-AR-A5QQ"  "GABRD" "E3"    2025399
"TCGA-A2-A0CP"  "PRKCZ" "E8"    2148880
"TCGA-A2-A3Y0"  "PRKCZ" "E13"   2172304
"TCGA-E2-A15E"  "PRKCZ" "E13"   2172364
"TCGA-BH-A18N"  "C1orf86"   "E2"    2194056
"TCGA-OL-A5D6"  "SKI"   "E1"    2229410
"TCGA-BH-A2L8"  "SKI"   "E3"    2303317
"TCGA-D8-A4Z1"  "SKI"   "E5"    2304504
"TCGA-BH-A0B6"  "SKI"   "E5"    2304579
"TCGA-GM-A2D9"  "MORN1" "E14"   2321445
"TCGA-AR-A0TR"  "MORN1" "E12"   2336532
"TCGA-BH-A18G"  "MORN1" "E8"    2372498
"TCGA-AN-A0FV"  "MORN1" "E8"    2372505
"TCGA-AN-A0FV"  "MORN1" "E8"    2372506
"TCGA-BH-A0HF"  "MORN1" "E8"    2372521
"TCGA-E2-A1IG"  "MORN1" "E8"    2372562
"TCGA-AQ-A04J"  "MORN1" "E7"    2374471
"TCGA-BH-A1F5"  "MORN1" "E4"    2387496
"TCGA-A8-A09Z"  "MORN1" "E3"    2388258
"TCGA-BH-A0HA"  "MORN1" "E3"    2388291
"TCGA-B6-A0IA"  "RER1"  "E2"    2397118
"TCGA-E2-A10C"  "PEX10" "E3"    2408786
"TCGA-AC-A3HN"  "PLCH2" "E1"    2476493
"TCGA-AC-A8OP"  "PLCH2" "E3"    2479760
"TCGA-A2-A0YK"  "PLCH2" "E5"    2484610
"TCGA-B6-A0IK"  "PLCH2" "E11"   2491251
"TCGA-C8-A135"  "PLCH2" "E18"   2498827
"TCGA-E2-A15E"  "PLCH2" "E19"   2499127
"TCGA-PE-A5DE"  "PLCH2" "E22"   2504427
"TCGA-LD-A74U"  "PANK4" "E16"   2510680
"TCGA-S3-AA10"  "PANK4" "E15"   2511386
"TCGA-C8-A1HM"  "PANK4" "E10"   2515609
"TCGA-AC-A23H"  "PANK4" "E7"    2519284
"TCGA-BH-A18N"  "PANK4" "E4"    2520820
"TCGA-BH-A0HF"  "PANK4" "E4"    2520821
"TCGA-BH-A0HF"  "PANK4" "E3"    2521173
"TCGA-5L-AAT1"  "PANK4" "E3"    2521297
"TCGA-B6-A0RN"  "PANK4" "E2"    2521747
"TCGA-5L-AAT1"  "PANK4" "E1"    2526579
"TCGA-C8-A12V"  "TNFRSF14"  "E3"    2558349
"TCGA-OL-A66P"  "TNFRSF14"  "E6"    2561704
"TCGA-A2-A25F"  "TNFRSF14"  "E6"    2561714
"TCGA-AC-A23H"  "TNFRSF14"  "E8"    2563296
"TCGA-A7-A6VV"  "MMEL1" "E20"   2592868
"TCGA-D8-A1J8"  "MMEL1" "E20"   2592908
"TCGA-A2-A0EU"  "MMEL1" "E15"   2596056
"TCGA-AR-A1AJ"  "MMEL1" "E11"   2603941
"TCGA-E2-A572"  "MMEL1" "E10"   2604271
"TCGA-EW-A1J5"  "MMEL1" "E10"   2604275
"TCGA-E2-A574"  "MMEL1" "E9"    2605620
"TCGA-AC-A23H"  "MMEL1" "E5"    2609733
"TCGA-B6-A0IA"  "MMEL1" "E5"    2609757
"TCGA-AN-A0XS"  "ACTRT2"    "E1"    3021531
"TCGA-E9-A1RC"  "ACTRT2"    "E1"    3022445
"TCGA-AC-A6IW"  "ACTRT2"    "E1"    3022597
"TCGA-A2-A4S3"  "ACTRT2"    "E1"    3022696
"TCGA-A2-A0EX"  "PRDM16"    "E4"    3385231
"TCGA-B6-A0WZ"  "PRDM16"    "E5"    3396498
"TCGA-E9-A22E"  "PRDM16"    "E6"    3402898
"TCGA-BH-A8FZ"  "PRDM16"    "E9"    3411750
"TCGA-BH-A0HF"  "PRDM16"    "E9"    3412534
"TCGA-A8-A09V"  "PRDM16"    "E15"   3431074
"TCGA-5L-AAT1"  "ARHGEF16"  "E2"    3463558
"TCGA-AR-A1AI"  "ARHGEF16"  "E4"    3467276
"TCGA-E9-A1R4"  "ARHGEF16"  "E13"   3479535
"TCGA-AC-A5XS"  "MEGF6" "E37"   3490581
"TCGA-AC-A23H"  "MEGF6" "E7"    3514608
"TCGA-A2-A0SY"  "MEGF6" "E2"    3602584
"TCGA-AC-A23H"  "TPRG1L"    "E2"    3625459
"TCGA-AR-A1AS"  "WRAP73"    "E1"    3649987
"TCGA-BH-A0DX"  "TP73"  "E2"    3682353
"TCGA-A1-A0SF"  "TP73"  "E3"    3683095
"TCGA-5L-AAT1"  "TP73"  "E3"    3683125
"TCGA-A8-A095"  "TP73"  "E4"    3707673
"TCGA-BH-A0HA"  "TP73"  "E8"    3727733
"TCGA-E9-A1R5"  "CCDC27"    "E1"    3752488
"TCGA-BH-A0C0"  "CCDC27"    "E3"    3755474
"TCGA-GM-A5PV"  "CCDC27"    "E6"    3762664
"TCGA-A2-A3Y0"  "CCDC27"    "E9"    3766586
"TCGA-BH-A1F6"  "LRRC47"    "E7"    3781140
"TCGA-B6-A0WZ"  "LRRC47"    "E7"    3781267
"TCGA-BH-A0B6"  "LRRC47"    "E2"    3787022
"TCGA-AR-A2LE"  "CEP104"    "E19"   3823467
"TCGA-A2-A0EN"  "CEP104"    "E12"   3833888
"TCGA-A8-A09Z"  "CEP104"    "E11"   3835070
"TCGA-AC-A4ZE"  "DFFB"  "E7"    3883608
"TCGA-AR-A1AH"  "C1orf174"  "E4"    3890056
"TCGA-BH-A18G"  "AJAP1" "E2"    4711920
"TCGA-BH-A1F2"  "AJAP1" "E3"    4769894
"TCGA-AC-A23H"  "AJAP1" "E4"    4772372
"TCGA-C8-A12L"  "AJAP1" "E4"    4772486
"TCGA-AC-A23H"  "NPHP4" "E28"   5864419
"TCGA-BH-A1FD"  "NPHP4" "E28"   5864478
"TCGA-EW-A1IZ"  "NPHP4" "E27"   5865131
"TCGA-EW-A1J5"  "NPHP4" "E27"   5865172
"TCGA-A2-A04P"  "NPHP4" "E26"   5866378
"TCGA-D8-A1XZ"  "NPHP4" "E24"   5867849
"TCGA-4H-AAAK"  "NPHP4" "E20"   5877217
"TCGA-D8-A1JD"  "NPHP4" "E19"   5880178
"TCGA-GM-A2D9"  "NPHP4" "E17"   5890905
"TCGA-E2-A1LA"  "NPHP4" "E17"   5890987
"TCGA-D8-A1J8"  "NPHP4" "E17"   5891009
"TCGA-E2-A1B4"  "NPHP4" "E16"   5904787
"TCGA-EW-A1P3"  "NPHP4" "E13"   5907125
"TCGA-BH-A0EE"  "NPHP4" "E12"   5909177
"TCGA-A2-A0ER"  "NPHP4" "E11"   5927714
"TCGA-BH-A0HF"  "NPHP4" "E11"   5927761
"TCGA-AO-A128"  "NPHP4" "E9"    5947174
"TCGA-AC-A23H"  "NPHP4" "E4"    5969131
"TCGA-AC-A23H"  "NPHP4" "E2"    5986185

ОБНОВЛЕНИЕ

Столбцы lastObsFlagG и lastObsFlagGE выглядят следующим образом. Точно так же lastObsFlagID (не показан) должен иметь в основном 1, но иметь 0, если наблюдение не находится в последней строке этого уникального идентификатора (например, строка 4 должна иметь lastObsFlagID = 0, а строка 5 должна иметь lastObsFlagID = 1)

              ID     gene exon mutation lastObsFlagG lastObsFlagGE
1   TCGA-AN-A046    OR4F5   E1    69767            1             1
2   TCGA-A2-A0CP   SAMD11   E2   925952            1             1
3   TCGA-A8-A08H    NOC2L   E5   956126            0             1
4   TCGA-GM-A2DM    NOC2L   E4   956911            0             0
5   TCGA-GM-A2DM    NOC2L   E4   956912            1             1
6   TCGA-D8-A1XM   KLHL17   E3   961658            0             1
7   TCGA-BH-A18G   KLHL17   E5   962441            0             1
8   TCGA-3C-AALI   KLHL17   E8   963353            0             1
9   TCGA-AC-A62Y   KLHL17   E9   964004            1             1
10  TCGA-AR-A2LE  PLEKHN1   E1   966556            0             1
11  TCGA-E2-A14N  PLEKHN1   E5   970728            0             1
12  TCGA-AO-A0J4  PLEKHN1  E12   973506            1             1
13  TCGA-D8-A1J9     HES4   E3   999551            1             1
14  TCGA-EW-A1PH    ISG15   E2  1014276            1             1
15  TCGA-A2-A0T0     AGRN   E2  1022338            0             1
16  TCGA-GM-A2DD     AGRN   E3  1035303            0             1
17  TCGA-5L-AAT1     AGRN   E4  1040690            0             1
18  TCGA-OL-A5RW     AGRN   E8  1043314            0             1
19  TCGA-D8-A27M     AGRN  E25  1049355            0             1
20  TCGA-AR-A1AI     AGRN  E29  1050430            0             1
21  TCGA-5L-AAT0     AGRN  E36  1055374            0             0
22  TCGA-5L-AAT0     AGRN  E36  1055376            0             0
23  TCGA-C8-A8HP     AGRN  E36  1055442            1             1
24  TCGA-A7-A4SD   TTLL10  E13  1184971            1             1
25  TCGA-BH-A1F0     SDF4   E4  1223283            0             0
26  TCGA-AO-A128     SDF4   E4  1223330            0             1
27  TCGA-E9-A1R0     SDF4   E2  1228592            1             1
28  TCGA-A2-A04P   UBE2J2   E7  1255246            0             0
29  TCGA-C8-A274   UBE2J2   E7  1255342            1             1
30  TCGA-5L-AAT1   SCNN1D   E1  1281422            0             1
31  TCGA-AO-A128   SCNN1D   E6  1287116            0             1
32  TCGA-E2-A15R   SCNN1D   E7  1287596            0             1
33  TCGA-AC-A62V   SCNN1D  E11  1290543            1             1
34  TCGA-BH-A18V    ACAP3  E22  1294187            0             1
35  TCGA-A7-A6VX    ACAP3   E6  1300640            0             1
36  TCGA-GM-A2DB    ACAP3   E3  1303170            0             0
37  TCGA-EW-A1IY    ACAP3   E3  1303176            1             1
38  TCGA-D8-A1XQ   CPSF3L   E9  1313879            0             0
39  TCGA-5L-AAT1   CPSF3L   E9  1313888            0             1
40  TCGA-C8-A26Y   CPSF3L   E7  1314919            0             1
41  TCGA-D8-A1XK   CPSF3L   E2  1321057            1             1
42  TCGA-AO-A128   TAS1R3   E2  1331863            0             1
43  TCGA-A8-A07P   TAS1R3   E6  1334323            1             1
44  TCGA-A7-A0DA     DVL1  E14  1338066            0             1
45  TCGA-C8-A8HQ     DVL1  E10  1339589            0             1
46  TCGA-BH-A18T     DVL1   E8  1340130            1             1
47  TCGA-C8-A12V    MXRA8   E6  1354445            1             1
48  TCGA-C8-A3M8 AURKAIP1   E2  1374747            1             1
49  TCGA-BH-A0B6    CCNL2  E11  1387308            0             1
50  TCGA-A8-A09Z    CCNL2   E4  1395413            1             1
51  TCGA-AC-A23H   MRPL20   E4  1402084            0             0
52  TCGA-BH-A1FU   MRPL20   E4  1402116            0             0
53  TCGA-BH-A0W4   MRPL20   E4  1402194            0             0
54  TCGA-AR-A1AH   MRPL20   E4  1402205            1             1
55  TCGA-A8-A06Q  ANKRD65   E1  1420868            1             1
56  TCGA-AC-A8OQ   ATAD3C   E1  1450566            0             1
57  TCGA-A2-A25A   ATAD3C  E11  1462661            1             1
58  TCGA-AR-A5QQ   ATAD3B   E7  1482563            0             1
59  TCGA-AO-A1KS   ATAD3B  E12  1487900            0             1
60  TCGA-AO-A124   ATAD3B  E15  1490662            0             1
61  TCGA-A7-A56D   ATAD3B  E16  1495857            0             0
62  TCGA-D8-A27N   ATAD3B  E16  1495961            1             1
63  TCGA-AR-A2LH   ATAD3A   E5  1518929            0             1
64  TCGA-EW-A1OY   ATAD3A  E12  1525277            0             1
65  TCGA-AO-A128   ATAD3A  E16  1533987            1             1
66  TCGA-BH-A1FM    SSU72   E3  1544993            1             1
67  TCGA-C8-A12Y     MIB2   E4  1623874            0             1
68  TCGA-AC-A23H     MIB2   E7  1625321            0             1
69  TCGA-B6-A0RV     MIB2  E10  1626960            0             1
70  TCGA-E2-A1LG     MIB2  E19  1629667            1             1
71  TCGA-C8-A1HI SLC35E2B   E8  1668398            0             0
72  TCGA-A2-A0CR SLC35E2B   E8  1668406            0             1
73  TCGA-PL-A8LZ SLC35E2B   E7  1669727            1             1
74  TCGA-C8-A3M7   CDK11A  E17  1703847            0             0
75  TCGA-D8-A1JP   CDK11A  E17  1703884            0             0
76  TCGA-C8-A1HK   CDK11A  E17  1703915            0             1
77  TCGA-D8-A27G   CDK11A  E15  1704265            0             1
78  TCGA-5L-AAT1   CDK11A  E13  1705003            0             1
79  TCGA-D8-A1JE   CDK11A  E11  1707419            0             1
80  TCGA-BH-A18P   CDK11A   E5  1716352            0             0
81  TCGA-D8-A27G   CDK11A   E5  1716387            0             0
82  TCGA-EW-A6SD   CDK11A   E5  1716477            0             1
83  TCGA-BH-A0W4   CDK11A   E3  1721601            0             0
84  TCGA-AN-A049   CDK11A   E3  1721603            1             1
85  TCGA-D8-A1XK  SLC35E2   E3  1739030            0             1
86  TCGA-LL-A5YM  SLC35E2   E1  1745772            1             1
87  TCGA-A7-A26H     NADK   E5  1756596            1             1
88  TCGA-AO-A128     GNB1   E5  1815804            1             1
89  TCGA-A2-A3Y0   CALML6   E4  1916819            0             0
90  TCGA-AR-A0U3   CALML6   E4  1916828            1             1
91  TCGA-AR-A5QQ    GABRD   E3  2025399            1             1
92  TCGA-A2-A0CP    PRKCZ   E8  2148880            0             1
93  TCGA-A2-A3Y0    PRKCZ  E13  2172304            0             0
94  TCGA-E2-A15E    PRKCZ  E13  2172364            1             1
95  TCGA-BH-A18N  C1orf86   E2  2194056            1             1
96  TCGA-OL-A5D6      SKI   E1  2229410            0             1
97  TCGA-BH-A2L8      SKI   E3  2303317            0             1
98  TCGA-D8-A4Z1      SKI   E5  2304504            0             0
99  TCGA-BH-A0B6      SKI   E5  2304579            1             1
100 TCGA-GM-A2D9    MORN1  E14  2321445            0             1
101 TCGA-AR-A0TR    MORN1  E12  2336532            0             1
102 TCGA-BH-A18G    MORN1   E8  2372498            0             0
103 TCGA-AN-A0FV    MORN1   E8  2372505            0             0
104 TCGA-AN-A0FV    MORN1   E8  2372506            0             0
105 TCGA-BH-A0HF    MORN1   E8  2372521            0             0
106 TCGA-E2-A1IG    MORN1   E8  2372562            0             1
107 TCGA-AQ-A04J    MORN1   E7  2374471            0             1
108 TCGA-BH-A1F5    MORN1   E4  2387496            0             1
109 TCGA-A8-A09Z    MORN1   E3  2388258            0             0
110 TCGA-BH-A0HA    MORN1   E3  2388291            1             1
111 TCGA-B6-A0IA     RER1   E2  2397118            1             1
112 TCGA-E2-A10C    PEX10   E3  2408786            1             1
113 TCGA-AC-A3HN    PLCH2   E1  2476493            0             1
114 TCGA-AC-A8OP    PLCH2   E3  2479760            0             1
115 TCGA-A2-A0YK    PLCH2   E5  2484610            0             1
116 TCGA-B6-A0IK    PLCH2  E11  2491251            0             1
117 TCGA-C8-A135    PLCH2  E18  2498827            0             1
118 TCGA-E2-A15E    PLCH2  E19  2499127            0             1
119 TCGA-PE-A5DE    PLCH2  E22  2504427            1             1
120 TCGA-LD-A74U    PANK4  E16  2510680            0             1
121 TCGA-S3-AA10    PANK4  E15  2511386            0             1
122 TCGA-C8-A1HM    PANK4  E10  2515609            0             1
123 TCGA-AC-A23H    PANK4   E7  2519284            0             1
124 TCGA-BH-A18N    PANK4   E4  2520820            0             0
125 TCGA-BH-A0HF    PANK4   E4  2520821            0             1
126 TCGA-BH-A0HF    PANK4   E3  2521173            0             0
127 TCGA-5L-AAT1    PANK4   E3  2521297            0             1
128 TCGA-B6-A0RN    PANK4   E2  2521747            0             1
129 TCGA-5L-AAT1    PANK4   E1  2526579            1             1
130 TCGA-C8-A12V TNFRSF14   E3  2558349            0             1
131 TCGA-OL-A66P TNFRSF14   E6  2561704            0             0
132 TCGA-A2-A25F TNFRSF14   E6  2561714            0             1
133 TCGA-AC-A23H TNFRSF14   E8  2563296            1             1
134 TCGA-A7-A6VV    MMEL1  E20  2592868            0             0
135 TCGA-D8-A1J8    MMEL1  E20  2592908            0             1
136 TCGA-A2-A0EU    MMEL1  E15  2596056            0             1
137 TCGA-AR-A1AJ    MMEL1  E11  2603941            0             1
138 TCGA-E2-A572    MMEL1  E10  2604271            0             0
139 TCGA-EW-A1J5    MMEL1  E10  2604275            0             1
140 TCGA-E2-A574    MMEL1   E9  2605620            0             1
141 TCGA-AC-A23H    MMEL1   E5  2609733            0             0
142 TCGA-B6-A0IA    MMEL1   E5  2609757            1             1
143 TCGA-AN-A0XS   ACTRT2   E1  3021531            0             0
144 TCGA-E9-A1RC   ACTRT2   E1  3022445            0             0
145 TCGA-AC-A6IW   ACTRT2   E1  3022597            0             0
146 TCGA-A2-A4S3   ACTRT2   E1  3022696            1             1
147 TCGA-A2-A0EX   PRDM16   E4  3385231            0             1
148 TCGA-B6-A0WZ   PRDM16   E5  3396498            0             1
149 TCGA-E9-A22E   PRDM16   E6  3402898            0             1
150 TCGA-BH-A8FZ   PRDM16   E9  3411750            0             0
151 TCGA-BH-A0HF   PRDM16   E9  3412534            0             1
152 TCGA-A8-A09V   PRDM16  E15  3431074            1             1
153 TCGA-5L-AAT1 ARHGEF16   E2  3463558            0             1
154 TCGA-AR-A1AI ARHGEF16   E4  3467276            0             1
155 TCGA-E9-A1R4 ARHGEF16  E13  3479535            1             1
156 TCGA-AC-A5XS    MEGF6  E37  3490581            0             1
157 TCGA-AC-A23H    MEGF6   E7  3514608            0             1
158 TCGA-A2-A0SY    MEGF6   E2  3602584            1             1
159 TCGA-AC-A23H   TPRG1L   E2  3625459            1             1
160 TCGA-AR-A1AS   WRAP73   E1  3649987            1             1
161 TCGA-BH-A0DX     TP73   E2  3682353            0             1
162 TCGA-A1-A0SF     TP73   E3  3683095            0             0
163 TCGA-5L-AAT1     TP73   E3  3683125            0             1
164 TCGA-A8-A095     TP73   E4  3707673            0             1
165 TCGA-BH-A0HA     TP73   E8  3727733            1             1
166 TCGA-E9-A1R5   CCDC27   E1  3752488            0             1
167 TCGA-BH-A0C0   CCDC27   E3  3755474            0             1
168 TCGA-GM-A5PV   CCDC27   E6  3762664            0             1
169 TCGA-A2-A3Y0   CCDC27   E9  3766586            1             1
170 TCGA-BH-A1F6   LRRC47   E7  3781140            0             0
171 TCGA-B6-A0WZ   LRRC47   E7  3781267            0             1
172 TCGA-BH-A0B6   LRRC47   E2  3787022            1             1
173 TCGA-AR-A2LE   CEP104  E19  3823467            0             1
174 TCGA-A2-A0EN   CEP104  E12  3833888            0             1
175 TCGA-A8-A09Z   CEP104  E11  3835070            1             1
176 TCGA-AC-A4ZE     DFFB   E7  3883608            1             1
177 TCGA-AR-A1AH C1orf174   E4  3890056            1             1
178 TCGA-BH-A18G    AJAP1   E2  4711920            0             1
179 TCGA-BH-A1F2    AJAP1   E3  4769894            0             1
180 TCGA-AC-A23H    AJAP1   E4  4772372            0             0
181 TCGA-C8-A12L    AJAP1   E4  4772486            1             1
182 TCGA-AC-A23H    NPHP4  E28  5864419            0             0
183 TCGA-BH-A1FD    NPHP4  E28  5864478            0             1
184 TCGA-EW-A1IZ    NPHP4  E27  5865131            0             0
185 TCGA-EW-A1J5    NPHP4  E27  5865172            0             1
186 TCGA-A2-A04P    NPHP4  E26  5866378            0             1
187 TCGA-D8-A1XZ    NPHP4  E24  5867849            0             1
188 TCGA-4H-AAAK    NPHP4  E20  5877217            0             1
189 TCGA-D8-A1JD    NPHP4  E19  5880178            0             1
190 TCGA-GM-A2D9    NPHP4  E17  5890905            0             0
191 TCGA-E2-A1LA    NPHP4  E17  5890987            0             0
192 TCGA-D8-A1J8    NPHP4  E17  5891009            0             1
193 TCGA-E2-A1B4    NPHP4  E16  5904787            0             1
194 TCGA-EW-A1P3    NPHP4  E13  5907125            0             1
195 TCGA-BH-A0EE    NPHP4  E12  5909177            0             1
196 TCGA-A2-A0ER    NPHP4  E11  5927714            0             0
197 TCGA-BH-A0HF    NPHP4  E11  5927761            0             1
198 TCGA-AO-A128    NPHP4   E9  5947174            0             1
199 TCGA-AC-A23H    NPHP4   E4  5969131            0             1
200 TCGA-AC-A23H    NPHP4   E2  5986185            1             1
0
stats134711 23 Мар 2019 в 06:10

1 ответ

Лучший ответ

Мы могли бы использовать ту же логику для генерации флага 1/0 для разных групп. Здесь мы проверяем, является ли текущий row_number() в группе последним (n()), который возвращает значения TRUE/FALSE, которые мы преобразуем в 1/0 с помощью as.integer.

library(dplyr)
library(data.table)

df %>% 
  group_by(gene) %>%
  mutate(lastObsFlagG = as.integer(row_number() == n())) %>%
  group_by(gene,exon) %>%
  mutate(lastObsFlagGE = as.integer(row_number() == n())) %>%
  ungroup() %>%
  group_by(group = rleid(ID)) %>%
  mutate(lastObsFlagID = as.integer(row_number() == n())) %>%
  ungroup() %>%
  select(-group)


#   ID           gene    exon  mutation lastObsFlagG lastObsFlagGE lastObsFlagID
#  <fct>        <fct>   <fct>    <int>        <int>         <int>         <int>
# 1 TCGA-AN-A046 OR4F5   E1       69767            1             1             1
# 2 TCGA-A2-A0CP SAMD11  E2      925952            1             1             1
# 3 TCGA-A8-A08H NOC2L   E5      956126            0             1             1
# 4 TCGA-GM-A2DM NOC2L   E4      956911            0             0             0
# 5 TCGA-GM-A2DM NOC2L   E4      956912            1             1             1
# 6 TCGA-D8-A1XM KLHL17  E3      961658            0             1             1
# 7 TCGA-BH-A18G KLHL17  E5      962441            0             1             1
# 8 TCGA-3C-AALI KLHL17  E8      963353            0             1             1
# 9 TCGA-AC-A62Y KLHL17  E9      964004            1             1             1
#10 TCGA-AR-A2LE PLEKHN1 E1      966556            0             1             1
# … with 190 more rows
1
Ronak Shah 23 Мар 2019 в 13:11