w4 Hare et al 2009 Class Replication (Talking Minds Winter 2024)

Author

Professor Lisa Levinson

Published

April 23, 2024

Setup

Code

here::i_am("w4_hareetal2009/w4-analysis.qmd")
library(here)
library(tidyverse) 
library(lmerTest)
library(gt)
library(gtExtras)
library(gtsummary)

Code

df_sprt <- readRDS(here("w4_hareetal2009", "sprt.rds")) |>  ungroup()

df_compq <- readRDS(here("w4_hareetal2009", "compq.rds")) 

df_stimuli <- df_sprt |> 
  filter(cond1 == "tranS" | cond1 == "intrS") |> 
  count(stimitem, sentence, cond1, cond2, cond3, label)

df_demo <- readRDS(here("w4_hareetal2009", "demo.rds")) |> 
  select(-uniqname)

Code

# not all verbs are the third word! correct and make relative numbering
df_labels <- df_sprt |> 
  count(label, sentence) |> 
  separate_wider_delim(sentence, names = paste0("x", 1:30), 
                       delim = " ", too_few = "align_start") |> 
  select(1:6) |> 
  mutate(verbpos = 3) |> 
  mutate(verbpos = if_else(x3 %in% c("beam", "eventually", "man"), 4, verbpos)) |> select(label, verbpos) 
  
df_sprt <- left_join(df_sprt, df_labels, join_by(label)) |> 
  mutate(relnum = as.numeric(wordnum) - verbpos)

# add language background info to each row
df_sprt <- df_sprt |> 
  left_join(select(df_demo, ID, english_prim, nonenglish), join_by(ID))

# create analysis regions (two word phrases after verb)
df_sprt <- df_sprt |> 
  mutate(region = case_when(relnum == 0 ~ 0, 
                            relnum == 1 | relnum == 2 ~ 1,
                            relnum == 3 | relnum == 4 ~ 2),
         wordlength = str_length(word))

df_hareetal <- df_sprt |> 
  filter(cond1 == "tranS" | cond1 == "intrS") |> 
  rename(transitivity = cond1, subj_bias = cond2, verb = cond3, item = stimitem) |> 
  mutate(subject = if_else(subj_bias == "intr", "good-theme", "good-cause"),
         rt = as.numeric(rt)) |>
  # remove outlier reading times above 2 seconds
  filter(rt < 2000) |> 
  mutate(across(c(item, ID, codeID, transitivity, subj_bias, subject, verb, group, region), as.factor)) |> 
  select(time, ID, codeID, english_prim, nonenglish, wordnum, relnum, region, verbpos, word, wordlength, label, item, transitivity, subj_bias, subject, verb, group, rt, sentence)

df_regions <- df_hareetal |> 
  filter(!is.na(region)) |> 
  group_by(ID, codeID, region, label, item, transitivity, subj_bias, subject, verb, group, sentence, english_prim, nonenglish) |>
  summarize(region_rt = sum(rt),
            region_length = sum(wordlength)) |> 
  mutate(region_rt_avg = if_else(region %in% c(1,2), region_rt/2, region_rt)) |> 
  ungroup()

Summaries

Code

# participant means
df_imean <- df_regions |> 
  group_by(ID, codeID, region, transitivity, subject, english_prim, nonenglish) |> 
  summarize(mean = mean(region_rt_avg, na.rm = TRUE)) |> 
  ungroup()

df_imean |> 
  group_by(region, transitivity, subject) |> 
  summarize(mean = mean(mean, na.rm = TRUE)) |> 
  pivot_wider(id_cols = c(subject,transitivity), names_from = region, values_from = mean, names_prefix = "region") |>  
  mutate(transitivity = if_else(transitivity == "intrS", "intransitive", "transitive")) |> 
  ungroup() |> 
  gt() |> 
  fmt_number(decimals = 0) |> 
  cols_label(subject = "Subject", transitivity = "Transivitity", region0 = "Verb", region1 = "First Two Words", region2 = "Second Two Words") |> 
  opt_row_striping()

Subject	Transivitity	Verb	First Two Words	Second Two Words
good-cause	intransitive	361	345	343
good-theme	intransitive	353	337	342
good-cause	transitive	359	331	327
good-theme	transitive	348	338	342

Visualizations

In the original paper, they seemed to average the reading times for the two word phrases after the verb. These figures reproduce that analysis. This makes the reading times after the verb look surprisingly short because some very short words are in those phrases.

Code

colors <- c("blue", "hotpink")

Intransitive Sentences

These are the sentences where subjects that are good causes are less expected, and predicted to take longer after the verb.

Good cause: The brick shattered into tiny pieces when it hit the floor.
Good theme: The glass shattered into tiny pieces when it hit the floor.

Our results (statistics below) do not support the hypothesis. Although the good-cause sentences on average are slightly slower, the difference is small - barely larger than the difference between the verbs themselves.

Code

df_regions |> 
  filter(transitivity == "intrS") |> 
  group_by(ID, region, subject) |> 
  summarize(mean = mean(region_rt_avg, na.rm = TRUE)) |> 
  ggplot(aes(x = region, y = mean, color = subject)) + 
  geom_point(stat = "summary", position = position_dodge(width = .5)) +
  geom_errorbar(stat = "summary", position = position_dodge(width = .5), width = .5) +
  geom_line(aes(group = subject), stat = "summary", position = position_dodge(width = .5)) +
  scale_color_manual(values = colors) +
  scale_x_discrete(labels = c("shattered", "into tiny", "bits when")) +
  labs(title = "Intransitive Sentences", x = "Region", y = "Reading Time (ms)",  color = "Subject Type") +
  theme_classic()

Transitive Sentences

For the transitive sentences, the good-theme subjects are expected to have slower reading times after the verb.

Good cause: The brick shattered the fragile goblet when they bumped together.
Good theme: The glass shattered the fragile goblet when they bumped together.

Our results provide some evidence for the effect in transitives, with a significant effect in region 2. Overall though the unreliability of the effect in the first region is not completting (and not statistically significant). It may be that we would need many more participants to detect such a small effect using an online experiment.

Code

df_regions |> 
  filter(transitivity == "tranS") |> 
  group_by(ID, region, subject) |> 
  summarize(mean = mean(region_rt_avg, na.rm = TRUE)) |> 
  ggplot(aes(x = region, y = mean, color = subject)) + 
  geom_point(stat = "summary", position = position_dodge(width = .5)) +
  geom_errorbar(stat = "summary", position = position_dodge(width = .5), width = .5) +
  geom_line(aes(group = subject), stat = "summary", position = position_dodge(width = .5)) +
  scale_color_manual(values = colors) +
  scale_x_discrete(labels = c("shattered", "the fragile", "goblet when")) +
  labs(title = "Transitive Sentences",  x = "Region", y = "Reading Time (ms)", color = "Subject Type") +
  theme_classic()

Statistics

Code

m_reg1_intran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 1 & transitivity == "intrS"))
tbl_regression(m_reg1_intran, conf.int = FALSE)

Region 1 Intransitives
Characteristic	Beta	p-value
subject
good-cause	—
good-theme	-16	0.2
region_length	9.0	0.025

Code

m_reg1_tran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 1 & transitivity == "tranS"))
tbl_regression(m_reg1_tran, conf.int = FALSE)

Region 1 Transitives
Characteristic	Beta	p-value
subject
good-cause	—
good-theme	13	0.3
region_length	1.6	0.6

Code

m_reg2_intran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 2 & transitivity == "intrS"))
tbl_regression(m_reg2_intran, conf.int = FALSE)

Region 2 Intransitives
Characteristic	Beta	p-value
subject
good-cause	—
good-theme	-3.0	0.8
region_length	8.4	0.033

Code

m_reg2_tran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 2 & transitivity == "tranS"))
tbl_regression(m_reg2_tran, conf.int = FALSE)

Region 2 Transitives
Characteristic	Beta	p-value
subject
good-cause	—
good-theme	30	0.027
region_length	14	<0.001

Individual Results

These results are split into the intransitive and transitive groups, and focus only on the first region after the verb.

For the intransitive sentences, “good cause” subjects are predicted to lead to slower reading. Here the good cause sentence reading times are plotted to the right side of the mini-plot.

Code

pivoted_intran <- df_imean |>
  filter(transitivity == "intrS" & region == 1) |> 
  pivot_wider(names_from = c(subject), values_from = mean) |> 
  rename(goodcause = `good-cause`, goodtheme = `good-theme`) |> 
  arrange(codeID) |> 
  rowwise() |> 
  # need list of colnames as object names for sparklines
  mutate(
    # get the colnames for the vot cols and convert to symbols
    plot = 
      list(c(goodtheme, goodcause)),
         .after = codeID
         )

spark_table <- pivoted_intran |> 
  ungroup() |> 
  select(codeID, plot, english_prim, goodtheme,goodcause) |> 
  mutate(across(where(is.numeric), \(x) round(x,0))) |> 
  gt() |> 
    cols_label(
    codeID = "Code",
    plot = "Good Theme - Good Cause",
    goodtheme = "Good Theme",
    goodcause = "Good Cause",
    english_prim = "EnglishPrimary"
  ) |>  cols_width(
    codeID ~ px(100),
    plot ~ px(200),
    everything() ~ px(100)
  ) |> 
    gt_plt_sparkline(plot, type = "shaded") |> 
   opt_interactive(use_filters = TRUE)
spark_table

For the transitive sentences, “good theme” subjects are predicted to lead to slower reading. Here the good theme sentence reading times are plotted to the right side of the mini-plot.

Code

pivoted_tran <- df_imean |>
  filter(transitivity == "tranS" & region == 1) |> 
  pivot_wider(names_from = c(subject), values_from = mean) |> 
  rename(goodcause = `good-cause`, goodtheme = `good-theme`) |> 
  rowwise() |> 
  # need list of colnames as object names for sparklines
  mutate(
    # get the colnames for the vot cols and convert to symbols
    plot = 
      list(c(goodcause, goodtheme)),
         .after = codeID
         )

spark_table <- pivoted_tran |> 
  ungroup() |> 
  select(codeID, plot, english_prim, goodcause, goodtheme) |> 
  mutate(across(where(is.numeric), \(x) round(x,0))) |> 
  gt() |> 
    cols_label(
    codeID = "Code",
    plot = "Good Cause - Good Theme",
    goodcause = "Good Cause",
    goodtheme = "Good Theme",
    english_prim = "EnglishPrimary"
  ) |>  cols_width(
    codeID ~ px(100),
    plot ~ px(200),
    everything() ~ px(100)
  ) |> 
    gt_plt_sparkline(plot, type = "shaded") |> 
   opt_interactive(use_filters = TRUE)
spark_table