library(lme4)
library(lmerTest)

bnc <- read.csv('/Volumes/Disk1/erp/results_conll/bnc_acprose_gpt2-ft.csv')

# Log transform variables
bnc$logh <- log(bnc$xu_h)
bnc$loghdoc <- log(bnc$xu_h_document)
bnc$logp <- log(bnc$position)


# ================ Decontextualised information content ================

# -------------- Document --------------
m <- lmer(logh ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance  Std.Dev. Corr 
 # path     (Intercept) 0.0249882 0.15808       
          # logp        0.0003754 0.01937  -0.91
 # Residual             0.0332677 0.18239       
# Number of obs: 197278, groups:  path, 152

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)  -0.104879   0.013152 145.669795  -7.975 4.10e-13 ***
# logp          0.014371   0.001665 134.458741   8.629 1.55e-14 ***



# ================ Contextualised information content ================

# -------------- Document --------------
m <- lmer(loghdoc ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance Std.Dev. Corr 
 # path     (Intercept) 0.085780 0.29288       
          # logp        0.001643 0.04053  -0.94
 # Residual             0.123951 0.35207       
# Number of obs: 197258, groups:  path, 152

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)  -0.093973   0.024425 147.805084  -3.847 0.000177 ***
# logp          0.007610   0.003457 144.547297   2.201 0.029308 * 


# ==================== Mutual information ====================

# -------------- Document --------------
m <- lmer(xu_mi_document ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance Std.Dev. Corr 
 # path     (Intercept) 0.166460 0.40800       
          # logp        0.003948 0.06283  -0.80
 # Residual             1.148890 1.07186       
# Number of obs: 197256, groups:  path, 152

# Fixed effects:
             # Estimate Std. Error        df t value Pr(>|t|)    
# (Intercept) 7.190e-01  3.709e-02 1.493e+02  19.385  < 2e-16 ***
# logp        4.604e-02  5.949e-03 1.228e+02   7.739 3.16e-12 ***