library(lme4)
library(lmerTest)

bnc <- read.csv('/Volumes/Disk1/erp/results_conll/bnc_news_gpt2-ft.csv')

# Log transform variables
bnc$logh <- log(bnc$xu_h)
bnc$loghdoc <- log(bnc$xu_h_document)
bnc$logp <- log(bnc$position)


# ================ Decontextualised information content ================

# -------------- Document --------------
m <- lmer(logh ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance  Std.Dev. Corr 
 # path     (Intercept) 0.0100683 0.10034       
          # logp        0.0003024 0.01739  -0.82
 # Residual             0.0447659 0.21158       
# Number of obs: 161088, groups:  path, 153

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)  
# (Intercept)  -0.017047   0.009235 148.858823  -1.846   0.0669 .
# logp          0.003131   0.001710 118.529843   1.831   0.0696 .



# ================ Contextualised information content ================

# -------------- Document --------------
m <- lmer(loghdoc ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance Std.Dev. Corr 
 # path     (Intercept) 0.062140 0.24928       
          # logp        0.002405 0.04904  -0.91
 # Residual             0.297372 0.54532       
# Number of obs: 161082, groups:  path, 153

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)   0.057040   0.023170 154.392206   2.462   0.0149 *  
# logp         -0.022054   0.004687 146.705345  -4.705  5.8e-06 ***


# ==================== Mutual information ====================

# -------------- Document --------------
m <- lmer(xu_mi_document ~ 1 + logp + (1 + logp | path), bnc)
summary(m)
# Random effects:
 # Groups   Name        Variance Std.Dev. Corr 
 # path     (Intercept) 0.043574 0.20874       
          # logp        0.001526 0.03907  -0.87
 # Residual             0.843076 0.91819       
# Number of obs: 161082, groups:  path, 153

# Fixed effects:
             # Estimate Std. Error        df t value Pr(>|t|)    
# (Intercept) 7.085e-01  2.418e-02 1.500e+02    29.3   <2e-16 ***
# logp        5.138e-02  4.712e-03 9.352e+01    10.9   <2e-16 ***
