Code
here::i_am("w4_hareetal2009/w4-analysis.qmd")
library(here)
library(tidyverse)
library(lmerTest)
library(gt)
library(gtExtras)
library(gtsummary)here::i_am("w4_hareetal2009/w4-analysis.qmd")
library(here)
library(tidyverse)
library(lmerTest)
library(gt)
library(gtExtras)
library(gtsummary)df_sprt <- readRDS(here("w4_hareetal2009", "sprt.rds")) |> ungroup()
df_compq <- readRDS(here("w4_hareetal2009", "compq.rds"))
df_stimuli <- df_sprt |>
filter(cond1 == "tranS" | cond1 == "intrS") |>
count(stimitem, sentence, cond1, cond2, cond3, label)
df_demo <- readRDS(here("w4_hareetal2009", "demo.rds")) |>
select(-uniqname)# not all verbs are the third word! correct and make relative numbering
df_labels <- df_sprt |>
count(label, sentence) |>
separate_wider_delim(sentence, names = paste0("x", 1:30),
delim = " ", too_few = "align_start") |>
select(1:6) |>
mutate(verbpos = 3) |>
mutate(verbpos = if_else(x3 %in% c("beam", "eventually", "man"), 4, verbpos)) |> select(label, verbpos)
df_sprt <- left_join(df_sprt, df_labels, join_by(label)) |>
mutate(relnum = as.numeric(wordnum) - verbpos)
# add language background info to each row
df_sprt <- df_sprt |>
left_join(select(df_demo, ID, english_prim, nonenglish), join_by(ID))
# create analysis regions (two word phrases after verb)
df_sprt <- df_sprt |>
mutate(region = case_when(relnum == 0 ~ 0,
relnum == 1 | relnum == 2 ~ 1,
relnum == 3 | relnum == 4 ~ 2),
wordlength = str_length(word))
df_hareetal <- df_sprt |>
filter(cond1 == "tranS" | cond1 == "intrS") |>
rename(transitivity = cond1, subj_bias = cond2, verb = cond3, item = stimitem) |>
mutate(subject = if_else(subj_bias == "intr", "good-theme", "good-cause"),
rt = as.numeric(rt)) |>
# remove outlier reading times above 2 seconds
filter(rt < 2000) |>
mutate(across(c(item, ID, codeID, transitivity, subj_bias, subject, verb, group, region), as.factor)) |>
select(time, ID, codeID, english_prim, nonenglish, wordnum, relnum, region, verbpos, word, wordlength, label, item, transitivity, subj_bias, subject, verb, group, rt, sentence)
df_regions <- df_hareetal |>
filter(!is.na(region)) |>
group_by(ID, codeID, region, label, item, transitivity, subj_bias, subject, verb, group, sentence, english_prim, nonenglish) |>
summarize(region_rt = sum(rt),
region_length = sum(wordlength)) |>
mutate(region_rt_avg = if_else(region %in% c(1,2), region_rt/2, region_rt)) |>
ungroup()# participant means
df_imean <- df_regions |>
group_by(ID, codeID, region, transitivity, subject, english_prim, nonenglish) |>
summarize(mean = mean(region_rt_avg, na.rm = TRUE)) |>
ungroup()
df_imean |>
group_by(region, transitivity, subject) |>
summarize(mean = mean(mean, na.rm = TRUE)) |>
pivot_wider(id_cols = c(subject,transitivity), names_from = region, values_from = mean, names_prefix = "region") |>
mutate(transitivity = if_else(transitivity == "intrS", "intransitive", "transitive")) |>
ungroup() |>
gt() |>
fmt_number(decimals = 0) |>
cols_label(subject = "Subject", transitivity = "Transivitity", region0 = "Verb", region1 = "First Two Words", region2 = "Second Two Words") |>
opt_row_striping()| Subject | Transivitity | Verb | First Two Words | Second Two Words |
|---|---|---|---|---|
| good-cause | intransitive | 361 | 345 | 343 |
| good-theme | intransitive | 353 | 337 | 342 |
| good-cause | transitive | 359 | 331 | 327 |
| good-theme | transitive | 348 | 338 | 342 |
In the original paper, they seemed to average the reading times for the two word phrases after the verb. These figures reproduce that analysis. This makes the reading times after the verb look surprisingly short because some very short words are in those phrases.
colors <- c("blue", "hotpink")These are the sentences where subjects that are good causes are less expected, and predicted to take longer after the verb.
Our results (statistics below) do not support the hypothesis. Although the good-cause sentences on average are slightly slower, the difference is small - barely larger than the difference between the verbs themselves.
df_regions |>
filter(transitivity == "intrS") |>
group_by(ID, region, subject) |>
summarize(mean = mean(region_rt_avg, na.rm = TRUE)) |>
ggplot(aes(x = region, y = mean, color = subject)) +
geom_point(stat = "summary", position = position_dodge(width = .5)) +
geom_errorbar(stat = "summary", position = position_dodge(width = .5), width = .5) +
geom_line(aes(group = subject), stat = "summary", position = position_dodge(width = .5)) +
scale_color_manual(values = colors) +
scale_x_discrete(labels = c("shattered", "into tiny", "bits when")) +
labs(title = "Intransitive Sentences", x = "Region", y = "Reading Time (ms)", color = "Subject Type") +
theme_classic()For the transitive sentences, the good-theme subjects are expected to have slower reading times after the verb.
Our results provide some evidence for the effect in transitives, with a significant effect in region 2. Overall though the unreliability of the effect in the first region is not completting (and not statistically significant). It may be that we would need many more participants to detect such a small effect using an online experiment.
df_regions |>
filter(transitivity == "tranS") |>
group_by(ID, region, subject) |>
summarize(mean = mean(region_rt_avg, na.rm = TRUE)) |>
ggplot(aes(x = region, y = mean, color = subject)) +
geom_point(stat = "summary", position = position_dodge(width = .5)) +
geom_errorbar(stat = "summary", position = position_dodge(width = .5), width = .5) +
geom_line(aes(group = subject), stat = "summary", position = position_dodge(width = .5)) +
scale_color_manual(values = colors) +
scale_x_discrete(labels = c("shattered", "the fragile", "goblet when")) +
labs(title = "Transitive Sentences", x = "Region", y = "Reading Time (ms)", color = "Subject Type") +
theme_classic()m_reg1_intran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 1 & transitivity == "intrS"))
tbl_regression(m_reg1_intran, conf.int = FALSE)| Characteristic | Beta | p-value |
|---|---|---|
| subject | ||
| good-cause | — | |
| good-theme | -16 | 0.2 |
| region_length | 9.0 | 0.025 |
m_reg1_tran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 1 & transitivity == "tranS"))
tbl_regression(m_reg1_tran, conf.int = FALSE)| Characteristic | Beta | p-value |
|---|---|---|
| subject | ||
| good-cause | — | |
| good-theme | 13 | 0.3 |
| region_length | 1.6 | 0.6 |
m_reg2_intran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 2 & transitivity == "intrS"))
tbl_regression(m_reg2_intran, conf.int = FALSE)| Characteristic | Beta | p-value |
|---|---|---|
| subject | ||
| good-cause | — | |
| good-theme | -3.0 | 0.8 |
| region_length | 8.4 | 0.033 |
m_reg2_tran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 2 & transitivity == "tranS"))
tbl_regression(m_reg2_tran, conf.int = FALSE)| Characteristic | Beta | p-value |
|---|---|---|
| subject | ||
| good-cause | — | |
| good-theme | 30 | 0.027 |
| region_length | 14 | <0.001 |
These results are split into the intransitive and transitive groups, and focus only on the first region after the verb.
For the intransitive sentences, “good cause” subjects are predicted to lead to slower reading. Here the good cause sentence reading times are plotted to the right side of the mini-plot.
pivoted_intran <- df_imean |>
filter(transitivity == "intrS" & region == 1) |>
pivot_wider(names_from = c(subject), values_from = mean) |>
rename(goodcause = `good-cause`, goodtheme = `good-theme`) |>
arrange(codeID) |>
rowwise() |>
# need list of colnames as object names for sparklines
mutate(
# get the colnames for the vot cols and convert to symbols
plot =
list(c(goodtheme, goodcause)),
.after = codeID
)
spark_table <- pivoted_intran |>
ungroup() |>
select(codeID, plot, english_prim, goodtheme,goodcause) |>
mutate(across(where(is.numeric), \(x) round(x,0))) |>
gt() |>
cols_label(
codeID = "Code",
plot = "Good Theme - Good Cause",
goodtheme = "Good Theme",
goodcause = "Good Cause",
english_prim = "EnglishPrimary"
) |> cols_width(
codeID ~ px(100),
plot ~ px(200),
everything() ~ px(100)
) |>
gt_plt_sparkline(plot, type = "shaded") |>
opt_interactive(use_filters = TRUE)
spark_tableFor the transitive sentences, “good theme” subjects are predicted to lead to slower reading. Here the good theme sentence reading times are plotted to the right side of the mini-plot.
pivoted_tran <- df_imean |>
filter(transitivity == "tranS" & region == 1) |>
pivot_wider(names_from = c(subject), values_from = mean) |>
rename(goodcause = `good-cause`, goodtheme = `good-theme`) |>
rowwise() |>
# need list of colnames as object names for sparklines
mutate(
# get the colnames for the vot cols and convert to symbols
plot =
list(c(goodcause, goodtheme)),
.after = codeID
)
spark_table <- pivoted_tran |>
ungroup() |>
select(codeID, plot, english_prim, goodcause, goodtheme) |>
mutate(across(where(is.numeric), \(x) round(x,0))) |>
gt() |>
cols_label(
codeID = "Code",
plot = "Good Cause - Good Theme",
goodcause = "Good Cause",
goodtheme = "Good Theme",
english_prim = "EnglishPrimary"
) |> cols_width(
codeID ~ px(100),
plot ~ px(200),
everything() ~ px(100)
) |>
gt_plt_sparkline(plot, type = "shaded") |>
opt_interactive(use_filters = TRUE)
spark_table