library(lmerTest)
library(lme4)

pb <- read.csv('/Volumes/Disk1/erp/results_conll/pb_gpt2-ft.csv')
ch <- read.csv('/Volumes/Disk1/erp/results_conll/pbchains_gpt2-ft.csv')


# Log transform variables
pb$logh <- log(pb$xu_h)
ch$logh <- log(ch$xu_h)

pb$loghdial <- log(pb$xu_h_dialogue)
pb$loghround <- log(pb$xu_h_round)
ch$loghchain <- log(ch$xu_h_chain)

pb$logp <- log(pb$position_in_dialogue)
pb$logr <- log(pb$position_in_round)
ch$logp <- log(ch$round_number)


# ================ Decontextualised information content ================

# -------------- Dialogue --------------
m <- lmer(logh ~ 1 + logp + (1 + logp|dialogue_id), pb)
summary(m)
# Random effects:
#  Groups      Name        Variance Std.Dev. Corr 
#  dialogue_id (Intercept) 0.031163 0.17653       
#              logp        0.001527 0.03907  -0.68
#  Residual                0.111653 0.33415       
# Number of obs: 49012, groups:  dialogue_id, 750
# 
# Fixed effects:
#               Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)  -0.202967   0.008555 748.636772  -23.73   <2e-16 ***
# logp          0.042234   0.002215 724.046283   19.07   <2e-16 ***

# -------------- Round --------------
m <- lmer(logh ~ 1 + logr + (1 + logr|dialogue_id), pb)
summary(m)
# Random effects:
 # Groups      Name        Variance Std.Dev. Corr 
 # dialogue_id (Intercept) 0.021800 0.14765       
             # logr        0.001222 0.03496  -0.46
 # Residual                0.113553 0.33698       
# Number of obs: 49012, groups:  dialogue_id, 750

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)  -0.062097   0.006602 756.327102  -9.405   <2e-16 ***
# logr         -0.003676   0.002375 738.140090  -1.547    0.122 

# -------------- Chain --------------
m <- lmer(logh ~ 1 + logp + (1 + logp|dialogue_id), ch)
summary(m)
# Random effects:
 # Groups      Name        Variance Std.Dev. Corr 
 # dialogue_id (Intercept) 0.018812 0.13716       
             # logp        0.002437 0.04937  -0.34
 # Residual                0.081242 0.28503       
# Number of obs: 15347, groups:  dialogue_id, 750

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)  -0.094911   0.006744 737.832825  -14.07   <2e-16 ***
# logp          0.049886   0.004455 749.429514   11.20   <2e-16 ***



# ================ Contextualised information content ================

# -------------- Dialogue --------------
m <- lmer(loghdial ~ 1 + logp + (1 + logp|dialogue_id), pb)
summary(m)
# Random effects:
 # Groups      Name        Variance Std.Dev. Corr 
 # dialogue_id (Intercept) 0.034546 0.1859        
             # logp        0.003684 0.0607   -0.51
 # Residual                0.281050 0.5301        
# Number of obs: 48952, groups:  dialogue_id, 750

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)   0.097826   0.011280 744.620997   8.672   <2e-16 ***
# logp         -0.081786   0.003504 737.045890 -23.339   <2e-16 ***
 
# -------------- Round --------------
m <- lmer(loghround ~ 1 + logr + (1 + logr|dialogue_id), pb)
summary(m)
# Random effects:
 # Groups      Name        Variance Std.Dev. Corr 
 # dialogue_id (Intercept) 0.027363 0.16542       
             # logr        0.002691 0.05188  -0.33
 # Residual                0.235962 0.48576       
# Number of obs: 48919, groups:  dialogue_id, 750

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)   0.008468   0.008191 753.104037   1.034    0.302    
# logr         -0.080718   0.003471 719.975933 -23.257   <2e-16 ***


# -------------- Chain --------------
m <- lmer(loghchain ~ 1 + logp + (1 + logp|dialogue_id), ch)
summary(m) 
# Random effects:
 # Groups      Name        Variance Std.Dev. Corr
 # dialogue_id (Intercept) 0.014083 0.11867      
             # logp        0.007422 0.08615  0.10
 # Residual                0.141833 0.37661      
# Number of obs: 15325, groups:  dialogue_id, 750

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept)   0.043306   0.007382 733.158537   5.866 6.74e-09 ***
# logp         -0.127548   0.006242 751.290919 -20.435  < 2e-16 ***


# ==================== Mutual information ====================

# -------------- Dialogue --------------
m <- lmer(xu_mi_dialogue ~ 1 + logp + (1 + logp|dialogue_id), pb)
summary(m)
# Random effects:
 # Groups      Name        Variance  Std.Dev. Corr 
 # dialogue_id (Intercept) 0.0006757 0.02599       
             # logp        0.0076691 0.08757  -1.00
 # Residual                1.0426303 1.02109       
# Number of obs: 48952, groups:  dialogue_id, 750

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept) -1.396e-01  1.734e-02  3.020e+04  -8.053 8.36e-16 ***
# logp         3.606e-01  6.111e-03  1.273e+03  59.011  < 2e-16 ***

# -------------- Round --------------
m <- lmer(xu_mi_round ~ 1 + logr + (1 + logr|dialogue_id), pb)
summary(m)
# Random effects:
 # Groups      Name        Variance Std.Dev. Corr 
 # dialogue_id (Intercept) 0.06947  0.2636        
             # logr        0.02719  0.1649   -0.38
 # Residual                2.81316  1.6772        
# Number of obs: 48919, groups:  dialogue_id, 750

# Fixed effects:
             # Estimate Std. Error        df t value Pr(>|t|)    
# (Intercept)   0.09320    0.02137 740.17594    4.36 1.48e-05 ***
# logr          0.51813    0.01169 695.46874   44.33  < 2e-16 ***

# -------------- Chain --------------
m <- lmer(xu_mi_chain ~ 1 + logp + (1 + logp|dialogue_id), ch)
summary(m)
# Random effects:
 # Groups      Name        Variance Std.Dev. Corr
 # dialogue_id (Intercept) 0.0000   0.0000       
             # logp        0.1807   0.4251    NaN
 # Residual                3.6149   1.9013       
# Number of obs: 15325, groups:  dialogue_id, 750

# Fixed effects:
              # Estimate Std. Error         df t value Pr(>|t|)    
# (Intercept) -2.273e-01  3.007e-02  1.461e+04  -7.559 4.31e-14 ***
# logp         1.283e+00  3.127e-02  1.346e+03  41.032  < 2e-16 ***
