Supplementary material for HYPOTHESIS TESTING AND CONFIDENCE INTERVAL MISINTERPRETATIONS AMONG PSYCHOLOGY RESEARCHERS AND STUDENTS

Article Summary

These are the technical notes and code samples that accompany our main article on NHST and confidence interval misinterpretations among psychologists.

Files and resources

Metaanalysis Excel file
Lyu et al. (2018) analysis
Lyu et al. (2020) analysis

Lyu et al. (2018)

Files can be found here. These files include:

Lyu et al. original data in CSV format
The code below in a .R file

######################################################################################
# Analysis of Lyu et al. (2018)
######################################################################################

# This dataset can be found below:
# https://osf.io/qe4z8/

# Load libraries
library(tidyverse)
library(haven)

######################################################################################
# Recoding and cleaning for article presentation
######################################################################################
# Load data
data = read_csv("Lyu (2018).csv")

# Code eduation level
# 1-undergraduate；2/3－master；4-doctorial student；5/6－post-doc/assist prof.；7－teaching/research for years
data[["Education"]] =
  as_factor(labelled(data[["Education"]],
                     labels = c(
                       Undergraduate = 1,
                       Masters = 2,
                       Masters = 3,
                       `PhD student` = 4,
                       `Post-doc/assist prof.` = 5,
                       `Post-doc/assist prof.` = 6,
                       `Teaching/research for years` = 7))) %>%
  fct_explicit_na(na_level = "(Missing)")

# Code sub-field
# 1. foundamental research/cognitive psychology; 2. cognitive neuroscience; 3 social/legal psych;
# 4 clinical/medical psych; 5. developmental/educational; 6. theoretical/history of psych; 7. psychometric/psycho-stats.;
# 8. biological/neuropsycho; 9. psychiatry/medical; 10. neuroscience/neuroimaging; 11. others
data[["sub_field"]] =
  as_factor(labelled(data[["sub_field"]],
                     labels = c(
                       `foundamental research/cognitive psychology` = 1,
                       `cognitive neuroscience` = 2,
                       `social/legal psych` = 3,
                       `clinical/medical psych` = 4,
                       `developmental/educational` = 5,
                       `theoretical/history of psych` = 6,
                       `psychometric/psycho-stats` = 7,
                       `biological/neuropsycho` = 8,
                       `psychiatry/medical` = 9,
                       `neuroscience/neuroimaging` = 10,
                       `others` = 11))) %>%
  fct_explicit_na(na_level = "(Missing)")

# Create pall, which equals one if the respondent got at least 1 NHST question wrong
# Create ciall, which equals one if the respondent got at least 1 confidence interval question wrong
# Note that in the original coding a wrong answer is represented by 0
cooked_data = data %>%
  mutate(psum = p_Q1 + p_Q2 + p_Q3 + p_Q4 + p_Q5 + p_Q6) %>%
  mutate(pall = ifelse(psum == 6, 0, 1)) %>% # pall = 1 if at least 1 misunderstanding
  mutate(cisum = ci_Q1 + ci_Q2 + ci_Q3 + ci_Q4 + ci_Q5 + ci_Q6) %>%
  mutate(ciall = ifelse(cisum == 6, 0, 1)) %>% # ciall = 1 if at least 1 misunderstanding
  mutate(p_or_ci_sum = p_Q1 + p_Q2 + p_Q3 + p_Q4 + p_Q5 + p_Q6 + ci_Q1 + ci_Q2 + ci_Q3 + ci_Q4 + ci_Q5 + ci_Q6) %>%
  mutate(p_or_ci_all = ifelse(p_or_ci_sum == 12, 0, 1)) # p_or_ci_sum = 1 if at least 1 misunderstanding


###
# Analysis of psychologists
###
# Filter down to only psychologists
psychology_fields_data = cooked_data %>%
  filter(Psych_or_not == 1)

# Sample size by education
psychology_fields_data %>%
  group_by(Education) %>%
  tally

# Sample size by subfield
psychology_fields_data %>%
  group_by(sub_field) %>%
  tally

# NHST - Proportion with at least one misunderstanding by Education
psychology_fields_data %>%
  group_by(Education) %>%
  summarize(prop_with_misunderstanding = mean(pall))

# NHST - Proportion misunderstanding by question and education
# This table is shown in the article by The Research
# For variety this function uses mutate_at to round figures
psychology_fields_data %>%
  select(Education, p_Q1, p_Q2, p_Q3, p_Q4, p_Q5, p_Q6) %>%
  group_by(Education) %>%
  summarize_all(mean) %>%
  mutate_at(.vars=vars(p_Q1, p_Q2, p_Q3, p_Q4, p_Q5, p_Q6),
            .funs=function(x) { round(100*(1-x)) }) # mean(*) gives proportion of correct answers, 1 - mean(*) converts to proportion of incorrect.

# NHST - Average number of misinterpretations by education
psychology_fields_data %>%
  mutate(nhst_num_incorrect = 6 - psum) %>%
  group_by(Education) %>%
  summarize_at(.vars=vars(nhst_num_incorrect), mean) 

# NHST - Average number of misinterpretations by sub-field
psychology_fields_data %>%
  mutate(nhst_num_incorrect = 6 - psum) %>%
  group_by(sub_field) %>%
  summarize_at(.vars=vars(nhst_num_incorrect), mean) 


# CI - Proportion with at least one misunderstanding by Education
psychology_fields_data  %>%
  group_by(Education) %>%
  summarize(prop_with_misunderstanding = mean(ciall))

# CI - Proportion misunderstanding by question and education
# This table is shown in the article by The Research
# For variety this function uses mutate_at to round figures
psychology_fields_data %>%
  select(Education, ci_Q1, ci_Q2, ci_Q3, ci_Q4, ci_Q5, ci_Q6) %>%
  group_by(Education) %>%
  summarize_all(mean) %>%
  mutate_at(.vars=vars(ci_Q1, ci_Q2, ci_Q3, ci_Q4, ci_Q5, ci_Q6),
            .funs=function(x) { round(100*(1-x)) }) # mean(*) gives proportion of correct answers, 1 - mean(*) converts to proportion of incorrect.


###
# Analysis of psychology subfield
###
# NHST - Proportion misunderstanding by question and education
# This table is shown in the article by The Research
# Only report subfields with at least 9 respondents (a somewhat arbitrary choice The Research is making)
psychology_fields_data %>%
  group_by(sub_field) %>%
  summarize(prop_with_misunderstanding = round(100*mean(pall)),
            n=n()) %>%
  filter(n >= 9)

# CI - Proportion misunderstanding by question and education
# This table is shown in the article by The Research
# Only report subfields with at least 9 respondents (a somewhat arbitrary choice The Research is making)
psychology_fields_data%>%
  group_by(sub_field) %>%
  summarize(prop_with_misunderstanding = round(100*mean(ciall)),
            n=n()) %>%
  filter(n >= 9)

# CI - Average number of misinterpretations by education
psychology_fields_data %>%
  mutate(ci_num_incorrect = 6 - cisum) %>%
  group_by(Education) %>%
  summarize_at(.vars=vars(ci_num_incorrect), mean) 

# CI - Average number of misinterpretations by sub-field
psychology_fields_data %>%
  mutate(ci_num_incorrect = 6 - cisum) %>%
  group_by(sub_field) %>%
  summarize_at(.vars=vars(ci_num_incorrect), mean) 

###
# Analysis of medical subfields
###
# Select medical related subfields (as judged by The Research)
medical_fields_data = cooked_data %>%
  filter(sub_field == "cognitive neuroscience" |
         sub_field == "biological/neuropsycho" |
         sub_field == "psychiatry/medical" |
         sub_field == "neuroscience/neuroimaging")

# NHST - Proportion misunderstanding at least one question
medical_fields_data  %>%
  summarize(prop_with_misunderstanding = mean(pall))

# NHST - Proportion misunderstanding at least one question by sub-field
medical_fields_data  %>%
  group_by(sub_field) %>%
  summarize(prop_with_misunderstanding = mean(pall))

# Sample sizes
medical_fields_data  %>%
  group_by(sub_field) %>%
  tally

# NHST - Proportion misunderstanding by education
medical_fields_data  %>%
  group_by(Education) %>%
  summarize(prop_with_misunderstanding = mean(pall))


# NHST - Proportion misunderstanding by question and education
# This table is shown in the article by The Research
# For variety this function uses mutate_at to round figures
medical_fields_data %>%
  select(Education, p_Q1, p_Q2, p_Q3, p_Q4, p_Q5, p_Q6) %>%
  group_by(Education) %>%
  summarize_all(mean) %>%
  mutate_at(.vars=vars(p_Q1, p_Q2, p_Q3, p_Q4, p_Q5, p_Q6),
            .funs=function(x) { round(100*(1-x)) }) # mean(*) gives proportion of correct answers, 1 - mean(*) converts to proportion of incorrect.

# Samle sizes
medical_fields_data %>% group_by(Education) %>% tally()


# CI - Proportion misunderstanding at least one question
medical_fields_data  %>%
  summarize(prop_with_misunderstanding = mean(ciall))

# CI - Proportion misunderstanding at least one question by sub-field
medical_fields_data  %>%
  group_by(sub_field) %>%
  summarize(prop_with_misunderstanding = mean(ciall))

# CI - Proportion misunderstanding by question and education
# This table is shown in the article by The Research
# For variety this function uses mutate_at to round figures
medical_fields_data %>%
  select(ci_Q1, ci_Q2, ci_Q3, ci_Q4, ci_Q5, ci_Q6) %>%
  summarize_all(mean) %>%
  mutate_all(.funs=function(x) { round(100*(1-x)) }) # mean(*) gives proportion of correct answers, 1 - mean(*) converts to proportion of incorrect.

# Sample size
medical_fields_data  %>%
  group_by(sub_field) %>%
  tally


######################################################################################
# Recoding and cleaning for meta-analysis
######################################################################################
# Load data
data = read_csv("Lyu (2018).csv")

# Code eduation level, different than in analysis version.
# 1-undergraduate；2/3－master's；4-doctorial student；5/6/7－With PhD degree
data[["Education"]] =
  as_factor(labelled(data[["Education"]],
                     labels = c(
                       Undergraduate = 1,
                       Masters = 2,
                       Masters = 3,
                       `PhD student` = 4,
                       `With PhD degree` = 5,
                       `With PhD degree` = 6,
                       `With PhD degree` = 7))) %>%
  fct_explicit_na(na_level = "(Missing)")


# Create pall, which equals one if the respondent got at least 1 NHST question wrong
# Create ciall, which equals one if the respondent got at least 1 confidence interval question wrong
# Note that in the original coding a wrong answer is represented by 0
cooked_data = data %>%
  mutate(psum = p_Q1 + p_Q2 + p_Q3 + p_Q4 + p_Q5 + p_Q6) %>%
  mutate(pall = ifelse(psum == 6, 0, 1)) %>% # pall = 1 if at least 1 misunderstanding
  mutate(cisum = ci_Q1 + ci_Q2 + ci_Q3 + ci_Q4 + ci_Q5 + ci_Q6) %>%
  mutate(ciall = ifelse(cisum == 6, 0, 1)) %>% # ciall = 1 if at least 1 misunderstanding
  mutate(p_or_ci_sum = p_Q1 + p_Q2 + p_Q3 + p_Q4 + p_Q5 + p_Q6 + ci_Q1 + ci_Q2 + ci_Q3 + ci_Q4 + ci_Q5 + ci_Q6) %>%
  mutate(p_or_ci_all = ifelse(p_or_ci_sum == 12, 0, 1)) # ciall = 1 if at least 1 misunderstanding


###
# NHST
###
# Get proportion with at least one misunderstanding
# Users could have self-identified as psychology or not (captured in Psych_or_not flag)
cooked_data %>%
  group_by(Education, Psych_or_not) %>%
  summarize(prop_with_misunderstanding = mean(pall)) %>%
  spread(., Psych_or_not, prop_with_misunderstanding)

# Get sample size
# Users could have self-identified as psychology or not (captured in Psych_or_not flag)
cooked_data %>%
  group_by(Education, Psych_or_not) %>%
  tally() %>%
  spread(., Psych_or_not, n)


###
# CI
###
# Get proportion with at least one misunderstanding
# Users could have self-identified as psychology or not (captured in Psych_or_not flag)
cooked_data %>%
  group_by(Education, Psych_or_not) %>%
  summarize(prop_with_misunderstanding = mean(ciall)) %>%
  spread(., Psych_or_not, prop_with_misunderstanding)

# Get sample size
# Users could have self-identified as psychology or not (captured in Psych_or_not flag)
cooked_data %>%
  group_by(Education, Psych_or_not) %>%
  tally() %>%
  spread(., Psych_or_not, n)


###
# Across all 12 NHST and CI questions
###
# Get proportion with at least one misunderstanding
# Users could have self-identified as psychology or not (captured in Psych_or_not flag)
cooked_data %>%
  group_by(Education, Psych_or_not) %>%
  summarize(prop_with_misunderstanding = mean(p_or_ci_all)) %>%
  spread(., Psych_or_not, prop_with_misunderstanding)

# Get proportion with at least one misunderstanding
# Users could have self-identified as psychology or not (captured in Psych_or_not flag)
cooked_data %>%
  group_by(Education, Psych_or_not) %>%
  tally() %>%
  spread(., Psych_or_not, n)

Lyu et al. (2020)

Files can be found here. These files include:

Lyu et al. original data in CSV format
The code below in a .R file

######################################################################################
# Analysis of Lyu et al. (2020)
######################################################################################

# This dataset can be found below:
# https://osf.io/j4nz3/


# Load libraries
library(tidyverse)
library(haven)

# Load data
data = read_csv("Lyu (2020).csv")


###
# Code and transform data
###
# Code respondent field
data[["subject"]] =
  as_factor(labelled(data[["subject"]],
                    labels = c(
                      Science = 1,
                      `Engineering & Agronomy` = 2,
                      Medicine = 3,
                      Economics = 5,
                      Management = 6,
                      Psychology = 7,
                      `Socilogy and Other Social science` = 8,
                      `Math and Stats` = 9)))

# Code eduation level
data[["education"]] =
  as_factor(labelled(data[["education"]],
                     labels = c(
                              Undergraduate = 1,
                              Masters = 2,
                              `PhD student` = 3,
                              `With PhD degree` = 4)))

# Code version of questionnaire as either the significant or nonsignificant version
data[["session"]] =
  as_factor(labelled(data[["session"]],
                     labels = c(
                       significant = 1,
                       nonsignificant = 2)))

# Add column for total number of misunderstandings
data = data %>%
  mutate(num_nhst_wrong = 4 - (p1 + p2 + p3 + p4)) %>%
  mutate(num_ci_wrong = 4 - (ci1 + ci2 + ci3 + ci4))


###
# Reproduce Table 1
###

# Get all-up percentage of respondents with at least 1 incorrect response broken
# down by respondent field. By changing session to "nonsiginificant" & "pall" to "ciall"
# you can get all four major views from Table 1 of the paper.
data %>%
  filter(session == "significant") %>%
  select(subject, pall) %>%
  group_by(subject) %>%
  summarize_all(mean)

# Get sample sizes
data %>%
  select(subject, session) %>%
  group_by(subject, session) %>%
  tally %>%
  spread(., session, n)


###
# NHST misunderstandings - Descrpitive
###
# At least one misinterpretation, broken down by field
data %>%
  select(subject, pall) %>%
  group_by(subject) %>%
  summarize_all(mean) %>%
  arrange(desc(pall))

# Sample sizes
data %>%
  select(subject, pall) %>%
  group_by(subject) %>%
  tally


# At least one misinterpretation, broken down by field and instrument
# version (significant or nonsignificant)
data %>%
  select(session, subject, pall) %>%
  group_by(session, subject) %>%
  summarize_all(mean) %>%
  spread(., session, pall)

# Sample sizes
data %>%
  select(session, subject, pall) %>%
  group_by(session, subject) %>%
  tally %>%
  spread(., session, n)


# Get average number of incorrect NHST misinterpretations by field.
# Use sample sizes from calculation above.
data %>%
  select(subject, num_nhst_wrong) %>%
  group_by(subject) %>%
  summarize_all(mean) %>%
  arrange(desc(num_nhst_wrong))


# Get average number of incorrect NHST misinterpretations
# Use sample sizes from calculation above.
data %>%
  select(subject, num_nhst_wrong) %>%
  group_by(subject) %>%
  summarize_all(mean) %>%
  arrange(desc(num_nhst_wrong))

# Get average number of incorrect NHST misinterpretations by field and version.
# Use sample sizes from calculation above.
data %>%
  select(subject, session, num_nhst_wrong) %>%
  group_by(subject, session) %>%
  summarize_all(mean) %>%
  spread(., session, num_nhst_wrong)


# Get percentage of respondents with at least 1 incorrect response broken down 
# respondent field and education level
data %>%
  select(No, subject, education, pall) %>%
  spread(., education, pall) %>%
  select(-No) %>%
  group_by(subject) %>%
  summarize_all(mean, na.rm = TRUE)

# Could also use dcast if desired.
#data %>%
#  select(subject, education, pall) %>%
#  dcast(., subject ~ education, value.var="pall", fun.aggregate=mean)

# Get sample sizes
data %>%
  select(subject, education) %>%
  group_by(subject, education) %>%
  tally() %>%
  spread(., education, n)


# Get average number of incorrect NHST misinterpretations by education
# Use sample sizes from calculation above.
data %>%
  select(No, subject, education, num_nhst_wrong) %>%
  spread(., education, num_nhst_wrong) %>%
  select(-No) %>%
  group_by(subject) %>%
  summarize_all(mean, na.rm = TRUE)


# Get proportion of incorrect responeses by question
# By changing session form "significant" to "nonsiginificant" it is possible to break down
# the proportion of incorrect responses by question, field, and statement version.
data %>%
  filter(session == "significant") %>%
  select(subject, p1, p2, p3, p4) %>%
  group_by(subject) %>%
  summarize_at(vars(p1, p2, p3, p4),
               .funs=function(x) { round(mean(x) * 100) })
  
  summarize_all(mean, na.rm = TRUE)

# Sample size (same for each question)
data %>%
  filter(session == "nonsignificant") %>%
  select(subject, p1, p2, p3, p4) %>%
  group_by(subject) %>%
  summarize_at(vars(p1, p2, p3, p4),
               .funs=function(x) { length(x) })


###
# NHST misunderstandings - Statistical
###
x = data %>%
  filter(subject == "Psychology") %>%
  filter(session == "nonsignificant") %>%
  select(num_nhst_wrong) %>%
  deframe

y = data %>%
  filter(subject == "Psychology") %>%
  filter(session == "significant") %>%
  select(num_nhst_wrong) %>%
  deframe

t.test(x,y)


###
# Confidence Interval misunderstandings
###
# At least one misinterpretation, broken down by field
data %>%
  select(subject, ciall) %>%
  group_by(subject) %>%
  summarize_all(mean) %>%
  arrange(desc(ciall))

# Sample sizes
data %>%
  select(subject, ciall) %>%
  group_by(subject) %>%
  tally


# At least one misinterpretation, broken down by field and instrument
# version (significant or nonsignificant)
data %>%
  select(session, subject, ciall) %>%
  group_by(session, subject) %>%
  summarize_all(mean) %>%
  spread(., session, ciall)

# Sample sizes
data %>%
  select(session, subject, ciall) %>%
  group_by(session, subject) %>%
  tally %>%
  spread(., session, n)


# Get average number of incorrect CI misinterpretations by field
# Use sample sizes from calculation above.
data %>%
  select(subject, num_ci_wrong) %>%
  group_by(subject) %>%
  summarize_all(mean) %>%
  arrange(desc(num_ci_wrong))

# Get average number of incorrect CI misinterpretations by field and version
# Use sample sizes from calculation above.
data %>%
  select(subject, session, num_ci_wrong) %>%
  group_by(subject, session) %>%
  summarize_all(mean) %>%
  spread(., session, num_ci_wrong)


# Get percentage of respondents with at least 1 incorrect response broken down 
# respondent field and education level
data %>%
  select(No, subject, education, ciall) %>%
  spread(., education, ciall) %>%
  select(-No) %>%
  group_by(subject) %>%
  summarize_all(mean, na.rm = TRUE)

# Could also use dcast if desired.
#data %>%
#  select(subject, education, ciall) %>%
#  dcast(., subject ~ education, value.var="ciall", fun.aggregate=mean)

# Get sample sizes
data %>%
  select(subject, education) %>%
  group_by(subject, education) %>%
  tally() %>%
  spread(., education, n)


# Get average number of incorrect CI misinterpretations by education
# Use sample sizes from calculation above.
data %>%
  select(No, subject, education, num_ci_wrong) %>%
  spread(., education, num_ci_wrong) %>%
  select(-No) %>%
  group_by(subject) %>%
  summarize_all(mean, na.rm = TRUE)


# Get proportion of incorrect responeses by question
# By changing session form "significant" to "nonsiginificant" it is possible to break down
# the proportion of incorrect responses by question, field, and statement version.
data %>%
  filter(session == "nonsignificant") %>%
  select(subject, ci1, ci2, ci3, ci4) %>%
  group_by(subject) %>%
  summarize_all(mean, na.rm = TRUE)

# Sample size (same for each question)
data %>%
  filter(session == "nonsignificant") %>%
  select(subject, ci1, ci2, ci3, ci4) %>%
  group_by(subject) %>%
  summarize_at(vars(ci1, ci2, ci3, ci4),
               .funs=function(x) { length(x) })

###
# CI misunderstandings - Statistical
###
x = data %>%
  filter(subject == "Psychology") %>%
  filter(session == "nonsignificant") %>%
  select(num_ci_wrong) %>%
  deframe

y = data %>%
  filter(subject == "Psychology") %>%
  filter(session == "significant") %>%
  select(num_ci_wrong) %>%
  deframe

t.test(x,y)