library(lme4)
library(lmerTest)

bnc <- read.csv('/Volumes/Disk1/erp/results_conll/bnc_unpub_gpt2-ft.csv')

# Log transform variables
bnc$logh <- log(bnc$xu_h)
bnc$loghdoc <- log(bnc$xu_h_document)
bnc$logp <- log(bnc$position)


# ================ Decontextualised information content ================

# -------------- Document --------------
m <- lmer(logh ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance  Std.Dev. Corr 
 # path     (Intercept) 0.0088776 0.09422       
          # logp        0.0002045 0.01430  -0.67
 # Residual             0.0669332 0.25871       
# Number of obs: 109968, groups:  path, 79

# Fixed effects:
             # Estimate Std. Error        df t value Pr(>|t|)    
# (Intercept) -0.050638   0.012396 73.460303  -4.085 0.000111 ***
# logp         0.005397   0.002057 46.215024   2.623 0.011767 *  


# ================ Contextualised information content ================

# -------------- Document --------------
m <- lmer(loghdoc ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance Std.Dev. Corr 
 # path     (Intercept) 0.131887 0.36316       
          # logp        0.003089 0.05558  -0.92
 # Residual             1.043361 1.02145       
# Number of obs: 109963, groups:  path, 79

# Fixed effects:
             # Estimate Std. Error        df t value Pr(>|t|)    
# (Intercept)  0.110701   0.047663 90.209130   2.323 0.022454 *  
# logp        -0.030419   0.007798 92.131807  -3.901 0.000182 ***


# ==================== Mutual information ====================

# -------------- Document --------------
m <- lmer(xu_mi_document ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance Std.Dev. Corr 
 # path     (Intercept) 0.088835 0.29805       
          # logp        0.003873 0.06223  -0.80
 # Residual             0.740076 0.86028       
# Number of obs: 109963, groups:  path, 79

# Fixed effects:
             # Estimate Std. Error        df t value Pr(>|t|)    
# (Intercept)  0.482858   0.040198 75.180827  12.012  < 2e-16 ***
# logp         0.068881   0.008354 63.617695   8.245 1.24e-11 ***