Я пытаюсь отфильтровать фрейм данных с помощью dplyr по группе, где фиксируется первое вхождение строки «ReadingOnset» в строке, и она и все последующие строки передаются в новый фрейм данных.

Text_Stimuli <- structure(list(Name = c("Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", 
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1"
), StimulusName = c("GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", 
"GenLie20"), StimuliBlock = c("Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4", 
"Block_4", "Block_4"), Reading_Onset = c("", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "ReadingOnset", "", "", "", "", 
"", "", "")), row.names = c(NA, -101L), vars = c("Name", "StimulusName", 
"StimuliBlock"), drop = TRUE, indices = list(0:100), group_sizes = 101L, biggest_group_size = 101L, labels = structure(list(
    Name = "Innocent Subject 15", StimulusName = "GenLie20", 
    StimuliBlock = "Block_4"), row.names = c(NA, -1L), class = "data.frame", vars = c("Name", 
"StimulusName", "StimuliBlock"), drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

Вот пример решения, которое я пытался заставить работать, но безуспешно.

Test <- Text_Stimuli %>% 
  group_by(Name, StimulusName, StimuliBlock)%>%   
  filter(!lead(cumsum(grepl("ReadingOnset", Reading_Onset)), default = 0))

Как видите, я пытаюсь сгруппировать по Name, StimulusName и StimuliBlock. Затем я пытаюсь найти первое вхождение «ReadingOnset» в столбце Reading_Onset и вернуть из него все ведущие строки (включая строку с «ReadingOnset»).

Я пытался адаптировать это решение к обратной проблеме: https://stackoverflow.com/a/37922522/2653210

0
Docconcoct 23 Окт 2018 в 13:55

2 ответа

Лучший ответ

Вы можете попробовать это со своим набором данных:

library(dplyr)
library(stringr)
library(zoo)
df %>% filter(ifelse(str_detect(Reading_Onset,"ReadingOnset"),TRUE,NA) %>%
              na.locf(na.rm=FALSE))

## A tibble: 8 x 4
## Groups:   Name, StimulusName, StimuliBlock [1]
#  Name                StimulusName StimuliBlock Reading_Onset
#  <chr>               <chr>        <chr>        <chr>        
#1 Innocent Subject 15 GenLie20     Block_4      ReadingOnset 
#2 Innocent Subject 15 GenLie20     Block_4      ""           
#3 Innocent Subject 15 GenLie20     Block_4      ""           
#4 Innocent Subject 15 GenLie20     Block_4      ""           
#5 Innocent Subject 15 GenLie20     Block_4      ""           
#6 Innocent Subject 15 GenLie20     Block_4      ""           
#7 Innocent Subject 15 GenLie20     Block_4      ""           
#8 Innocent Subject 15 GenLie20     Block_4      ""
2
Nicolas2 23 Окт 2018 в 11:39

Я не мог сказать, хотите ли вы все до и включая ReadingOnset, или все после, включая ReadingOnset, поэтому я покажу оба.

Все до и в том числе:

library(dplyr)

Text_Stimuli %>% 
  filter(row_number() <= which(grepl("ReadingOnset", Reading_Onset)))

Все после и в том числе:

Text_Stimuli %>% 
  filter(row_number() >= which(grepl("ReadingOnset", Reading_Onset)))

Мы просто выполняем фильтрацию на основе номера строки, в которой находится "ReadingOnset".

1
AndS. 23 Окт 2018 в 12:06
52947299