library(lme4)
library(lmerTest)

bnc <- read.csv('/Volumes/Disk1/erp/results_conll/bnc_fiction_gpt2-ft.csv')

# Log transform variables
bnc$logh <- log(bnc$xu_h)
bnc$loghdoc <- log(bnc$xu_h_document)
bnc$logp <- log(bnc$position)


# ================ Decontextualised information content ================

# -------------- Document --------------
m <- lmer(logh ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance  Std.Dev. Corr 
 # path     (Intercept) 0.0147535 0.1215        
          # logp        0.0001188 0.0109   -0.65
 # Residual             0.0520669 0.2282        
# Number of obs: 415110, groups:  path, 138

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)   0.012933   0.010677 132.944862   1.211    0.228    
# logp         -0.004434   0.001009 128.788578  -4.395  2.3e-05 ***


# ================ Contextualised information content ================

# -------------- Document --------------
m <- lmer(loghdoc ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance  Std.Dev. Corr 
 # path     (Intercept) 0.0498576 0.22329       
          # logp        0.0005116 0.02262  -0.54
 # Residual             0.2675580 0.51726       
# Number of obs: 415081, groups:  path, 138

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)   0.066773   0.019941 129.914633   3.349  0.00106 ** 
# logp         -0.023401   0.002123 112.710037 -11.020  < 2e-16 ***


# ==================== Mutual information ====================

# -------------- Document --------------
m <- lmer(xu_mi_document ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance Std.Dev. Corr 
 # path     (Intercept) 0.075617 0.27499       
          # logp        0.001348 0.03672  -0.63
 # Residual             0.994389 0.99719       
# Number of obs: 415081, groups:  path, 138

# Fixed effects:
             # Estimate Std. Error        df t value Pr(>|t|)    
# (Intercept) 7.812e-01  2.610e-02 1.241e+02   29.93  < 2e-16 ***
# logp        3.437e-02  3.562e-03 1.098e+02    9.65 2.53e-16 ***
