w4 Hare et al 2009 Class Replication (Talking Minds Winter 2024)

Author

Professor Lisa Levinson

Published

April 23, 2024

Setup

Code
here::i_am("w4_hareetal2009/w4-analysis.qmd")
library(here)
library(tidyverse) 
library(lmerTest)
library(gt)
library(gtExtras)
library(gtsummary)
Code
df_sprt <- readRDS(here("w4_hareetal2009", "sprt.rds")) |>  ungroup()

df_compq <- readRDS(here("w4_hareetal2009", "compq.rds")) 

df_stimuli <- df_sprt |> 
  filter(cond1 == "tranS" | cond1 == "intrS") |> 
  count(stimitem, sentence, cond1, cond2, cond3, label)

df_demo <- readRDS(here("w4_hareetal2009", "demo.rds")) |> 
  select(-uniqname)
Code
# not all verbs are the third word! correct and make relative numbering
df_labels <- df_sprt |> 
  count(label, sentence) |> 
  separate_wider_delim(sentence, names = paste0("x", 1:30), 
                       delim = " ", too_few = "align_start") |> 
  select(1:6) |> 
  mutate(verbpos = 3) |> 
  mutate(verbpos = if_else(x3 %in% c("beam", "eventually", "man"), 4, verbpos)) |> select(label, verbpos) 
  
df_sprt <- left_join(df_sprt, df_labels, join_by(label)) |> 
  mutate(relnum = as.numeric(wordnum) - verbpos)

# add language background info to each row
df_sprt <- df_sprt |> 
  left_join(select(df_demo, ID, english_prim, nonenglish), join_by(ID))

# create analysis regions (two word phrases after verb)
df_sprt <- df_sprt |> 
  mutate(region = case_when(relnum == 0 ~ 0, 
                            relnum == 1 | relnum == 2 ~ 1,
                            relnum == 3 | relnum == 4 ~ 2),
         wordlength = str_length(word))

df_hareetal <- df_sprt |> 
  filter(cond1 == "tranS" | cond1 == "intrS") |> 
  rename(transitivity = cond1, subj_bias = cond2, verb = cond3, item = stimitem) |> 
  mutate(subject = if_else(subj_bias == "intr", "good-theme", "good-cause"),
         rt = as.numeric(rt)) |>
  # remove outlier reading times above 2 seconds
  filter(rt < 2000) |> 
  mutate(across(c(item, ID, codeID, transitivity, subj_bias, subject, verb, group, region), as.factor)) |> 
  select(time, ID, codeID, english_prim, nonenglish, wordnum, relnum, region, verbpos, word, wordlength, label, item, transitivity, subj_bias, subject, verb, group, rt, sentence)

df_regions <- df_hareetal |> 
  filter(!is.na(region)) |> 
  group_by(ID, codeID, region, label, item, transitivity, subj_bias, subject, verb, group, sentence, english_prim, nonenglish) |>
  summarize(region_rt = sum(rt),
            region_length = sum(wordlength)) |> 
  mutate(region_rt_avg = if_else(region %in% c(1,2), region_rt/2, region_rt)) |> 
  ungroup()

Summaries

Code
# participant means
df_imean <- df_regions |> 
  group_by(ID, codeID, region, transitivity, subject, english_prim, nonenglish) |> 
  summarize(mean = mean(region_rt_avg, na.rm = TRUE)) |> 
  ungroup()

df_imean |> 
  group_by(region, transitivity, subject) |> 
  summarize(mean = mean(mean, na.rm = TRUE)) |> 
  pivot_wider(id_cols = c(subject,transitivity), names_from = region, values_from = mean, names_prefix = "region") |>  
  mutate(transitivity = if_else(transitivity == "intrS", "intransitive", "transitive")) |> 
  ungroup() |> 
  gt() |> 
  fmt_number(decimals = 0) |> 
  cols_label(subject = "Subject", transitivity = "Transivitity", region0 = "Verb", region1 = "First Two Words", region2 = "Second Two Words") |> 
  opt_row_striping()
Subject Transivitity Verb First Two Words Second Two Words
good-cause intransitive 361 345 343
good-theme intransitive 353 337 342
good-cause transitive 359 331 327
good-theme transitive 348 338 342

Visualizations

In the original paper, they seemed to average the reading times for the two word phrases after the verb. These figures reproduce that analysis. This makes the reading times after the verb look surprisingly short because some very short words are in those phrases.

Code
colors <- c("blue", "hotpink")

Intransitive Sentences

These are the sentences where subjects that are good causes are less expected, and predicted to take longer after the verb.

  • Good cause: The brick shattered into tiny pieces when it hit the floor.
  • Good theme: The glass shattered into tiny pieces when it hit the floor.

Our results (statistics below) do not support the hypothesis. Although the good-cause sentences on average are slightly slower, the difference is small - barely larger than the difference between the verbs themselves.

Code
df_regions |> 
  filter(transitivity == "intrS") |> 
  group_by(ID, region, subject) |> 
  summarize(mean = mean(region_rt_avg, na.rm = TRUE)) |> 
  ggplot(aes(x = region, y = mean, color = subject)) + 
  geom_point(stat = "summary", position = position_dodge(width = .5)) +
  geom_errorbar(stat = "summary", position = position_dodge(width = .5), width = .5) +
  geom_line(aes(group = subject), stat = "summary", position = position_dodge(width = .5)) +
  scale_color_manual(values = colors) +
  scale_x_discrete(labels = c("shattered", "into tiny", "bits when")) +
  labs(title = "Intransitive Sentences", x = "Region", y = "Reading Time (ms)",  color = "Subject Type") +
  theme_classic()

Transitive Sentences

For the transitive sentences, the good-theme subjects are expected to have slower reading times after the verb.

  • Good cause: The brick shattered the fragile goblet when they bumped together.
  • Good theme: The glass shattered the fragile goblet when they bumped together.

Our results provide some evidence for the effect in transitives, with a significant effect in region 2. Overall though the unreliability of the effect in the first region is not completting (and not statistically significant). It may be that we would need many more participants to detect such a small effect using an online experiment.

Code
df_regions |> 
  filter(transitivity == "tranS") |> 
  group_by(ID, region, subject) |> 
  summarize(mean = mean(region_rt_avg, na.rm = TRUE)) |> 
  ggplot(aes(x = region, y = mean, color = subject)) + 
  geom_point(stat = "summary", position = position_dodge(width = .5)) +
  geom_errorbar(stat = "summary", position = position_dodge(width = .5), width = .5) +
  geom_line(aes(group = subject), stat = "summary", position = position_dodge(width = .5)) +
  scale_color_manual(values = colors) +
  scale_x_discrete(labels = c("shattered", "the fragile", "goblet when")) +
  labs(title = "Transitive Sentences",  x = "Region", y = "Reading Time (ms)", color = "Subject Type") +
  theme_classic()

Statistics

Code
m_reg1_intran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 1 & transitivity == "intrS"))
tbl_regression(m_reg1_intran, conf.int = FALSE)
Region 1 Intransitives
Characteristic Beta p-value
subject

    good-cause
    good-theme -16 0.2
region_length 9.0 0.025
Code
m_reg1_tran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 1 & transitivity == "tranS"))
tbl_regression(m_reg1_tran, conf.int = FALSE)
Region 1 Transitives
Characteristic Beta p-value
subject

    good-cause
    good-theme 13 0.3
region_length 1.6 0.6
Code
m_reg2_intran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 2 & transitivity == "intrS"))
tbl_regression(m_reg2_intran, conf.int = FALSE)
Region 2 Intransitives
Characteristic Beta p-value
subject

    good-cause
    good-theme -3.0 0.8
region_length 8.4 0.033
Code
m_reg2_tran <- lmer(region_rt ~ subject + region_length + (1 + subject | ID) + (1 + subject |item), data = filter(df_regions, region == 2 & transitivity == "tranS"))
tbl_regression(m_reg2_tran, conf.int = FALSE)
Region 2 Transitives
Characteristic Beta p-value
subject

    good-cause
    good-theme 30 0.027
region_length 14 <0.001

Individual Results

These results are split into the intransitive and transitive groups, and focus only on the first region after the verb.

For the intransitive sentences, “good cause” subjects are predicted to lead to slower reading. Here the good cause sentence reading times are plotted to the right side of the mini-plot.

Code
pivoted_intran <- df_imean |>
  filter(transitivity == "intrS" & region == 1) |> 
  pivot_wider(names_from = c(subject), values_from = mean) |> 
  rename(goodcause = `good-cause`, goodtheme = `good-theme`) |> 
  arrange(codeID) |> 
  rowwise() |> 
  # need list of colnames as object names for sparklines
  mutate(
    # get the colnames for the vot cols and convert to symbols
    plot = 
      list(c(goodtheme, goodcause)),
         .after = codeID
         )

spark_table <- pivoted_intran |> 
  ungroup() |> 
  select(codeID, plot, english_prim, goodtheme,goodcause) |> 
  mutate(across(where(is.numeric), \(x) round(x,0))) |> 
  gt() |> 
    cols_label(
    codeID = "Code",
    plot = "Good Theme - Good Cause",
    goodtheme = "Good Theme",
    goodcause = "Good Cause",
    english_prim = "EnglishPrimary"
  ) |>  cols_width(
    codeID ~ px(100),
    plot ~ px(200),
    everything() ~ px(100)
  ) |> 
    gt_plt_sparkline(plot, type = "shaded") |> 
   opt_interactive(use_filters = TRUE)
spark_table

For the transitive sentences, “good theme” subjects are predicted to lead to slower reading. Here the good theme sentence reading times are plotted to the right side of the mini-plot.

Code
pivoted_tran <- df_imean |>
  filter(transitivity == "tranS" & region == 1) |> 
  pivot_wider(names_from = c(subject), values_from = mean) |> 
  rename(goodcause = `good-cause`, goodtheme = `good-theme`) |> 
  rowwise() |> 
  # need list of colnames as object names for sparklines
  mutate(
    # get the colnames for the vot cols and convert to symbols
    plot = 
      list(c(goodcause, goodtheme)),
         .after = codeID
         )

spark_table <- pivoted_tran |> 
  ungroup() |> 
  select(codeID, plot, english_prim, goodcause, goodtheme) |> 
  mutate(across(where(is.numeric), \(x) round(x,0))) |> 
  gt() |> 
    cols_label(
    codeID = "Code",
    plot = "Good Cause - Good Theme",
    goodcause = "Good Cause",
    goodtheme = "Good Theme",
    english_prim = "EnglishPrimary"
  ) |>  cols_width(
    codeID ~ px(100),
    plot ~ px(200),
    everything() ~ px(100)
  ) |> 
    gt_plt_sparkline(plot, type = "shaded") |> 
   opt_interactive(use_filters = TRUE)
spark_table