######################################################################
######################################################################
# Load in data
######################################################################
######################################################################
REPLICATION = T

if(!REPLICATION){
  source("data_construction.R")
} else{
  source("util.R")
  final_data <- fread("data/anon_final_data.csv")
  all_ann <- fread("data/all_annotated_info.csv")
  survey_wave_data <- fread("interwave_survey_data.csv")
}


######################################################################
######################################################################
# Results
######################################################################
######################################################################

#######################################################################################
## Overall Results
######################################################################################

rec_plt <- ggplot(final_data[any_notatall_conf == F, as.list(binom.agresti.coull(sum(surv==fin_ann),.N)), by=.(target,surv)], 
       aes(surv, mean,ymin=lower,ymax=upper,color=target,group=target,shape=target)) +
  geom_pointrange(position=position_dodge(.1)) + 
  geom_line(position=position_dodge(.1))  + 
  scale_y_continuous("% All Survey\nResponses Annotated\nw/ Correct Stance",labels=percent) + 
  xlab("Survey Response") + scale_color_discrete("Target") + scale_shape_discrete("Target")+ theme(legend.position="top")
ggsave("img/recall.pdf",rec_plt, h=3,w=6)

prec_plt <- ggplot(final_data[any_notatall_conf == F & surv != "Neutral"&fin_ann != "Neutral", 
                              as.list(binom.agresti.coull(sum(surv==fin_ann),.N)), by=.(target,fin_ann)][n>5], 
                  aes(fin_ann, mean,ymin=lower,ymax=upper,color=target,shape=target,group=target)) +
  geom_pointrange(position=position_dodge(.1)) + 
  geom_line(position=position_dodge(.1))  + 
  scale_y_continuous("% Pro/Anti\nAnnotations Matching\nPro/Anti Survey",labels=percent) + 
  xlab("Final Tweet Annotation") + scale_color_discrete("Target") + theme(legend.position="none") +
  scale_shape_discrete("Target") 
ggsave("img/precision.pdf",prec_plt, h=2.5,w=6)
######################################
########## RQ 1 ######################
######################################

v <- all_ann[, run_alpha(.SD),by=.(a1_conf,a2_conf,target)]
v[, perc := subj/sum(subj), by=target]
ka_plt <- ggplot(v, aes(a1_conf,a2_conf, 
              fill=perc,label=paste0("Agree: ",format(round(val, 2), nsmall = 2),
                                     "\n",format(round(perc, 2)*100, nsmall = 0), "% of Obs")))+ 
  geom_tile() + geom_text() + 
  facet_wrap(~target,nrow=1)+scale_fill_gradientn(colors=c("white","orange")) + theme(legend.position="none")
ggsave("img/kripp_alph.pdf",ka_plt, w=14,h=4)

run_alpha(all_ann[target=="Lockdowns" & a1_conf !="Very" & a2_conf != "Very"])

######################################
########## RQ 2 ######################
######################################


############## FIRST TIME PLOT ######################################
############## NOT FOR REPLICATION ######################################

if(!REPLICATION){
  tweets_to_sample[, createdat_date := as.POSIXct(created_at, format="%a %b %d %H:%M:%S +0000 %Y", tz="GMT")]
  fin_survey[ , survey_date_pt := as.POSIXct(survey_date)]
  
  surv <- data.table(melt(fin_survey, id=c("uid","survey_date_pt"),
               measure=c("vaccine_stance","trump_stance","mask_stance","lockdown_stance")))
  surv <- surv[!is.na(value)]
  surv[, target:= ifelse(variable == "trump_stance", "Trump",
                         ifelse(variable=="mask_stance","Masks",
                                ifelse(variable=="lockdown_stance","Lockdowns",
                                       "Vaccines")))]
  surv[, surv := reverse_get_stance(value)]
  
  df <- rbind(surv[,.(survey_date_pt,target,surv)],
              tweets_to_sample[, .(createdat_date,target)][, surv := "Tweets"],
              use.names=F
  )
  df[, surv := factor(surv,
                      levels=c("Anti","Pro","Neutral","Tweets"))]
  
  time_plt <- ggplot() + 
    geom_density_ridges2(data=df, aes(y=surv,x=survey_date_pt,
                                      fill=surv),alpha=.6) +
    facet_wrap(~target,ncol=2)+
    scale_y_discrete("",breaks=c("Tweets","Neutral","Pro","Anti"),
                     labels=c("Tweet\nTimeline",
                              "Survey: Neutral",
                              "Survey: Pro",
                              "Survey: Anti")) +
    theme_ridges() +
    theme(legend.position='none',
          axis.text.x=element_text(angle=45,hjust=1)) +
    scale_x_datetime("",limits = as_datetime(c("2020-02-01","2021-04-30")))
  ggsave("img/time2.pdf",time_plt, h=6,w=6)

}
final_data[target == "Trump" & any_notatall_conf==F & surv == "Neutral", 
           sum(fin_ann == "Anti" & confidence_integer==4)/.N,by=ideology][order(ideology)]

############## SECOND TIME PLOT ######################################
############## NOT FOR REPLICATION ######################################
final_data[, survdate_int := as.numeric(difftime(final_data$survey_date,ymd("2020-01-01")),units="days")]
final_data[, twdate_int := as.numeric(difftime(final_data$createdat_date,ymd("2020-01-01")),units="days")]
final_data[, twdate_minus_survdate := twdate_int - survdate_int]


survey_diffs <- rbind(
  survey_wave_data[, survey_differences(.SD,"trump_stance"),by=uid][, target:="Trump"],
  survey_wave_data[, survey_differences(.SD,"lockdown_stance"),by=uid][, target:="Lockdowns"],
  survey_wave_data[, survey_differences(.SD,"mask_stance"),by=uid][, target:="Masks"],
  survey_wave_data[, survey_differences(.SD,"vaccine_stance"),by=uid][, target:="Vaccines"]
)
survey_diffs[, time_diff := as.numeric(difftime(d2,d1,units="days"))]
survey_diffs[, agree := s1 == s2]
survey_diffs[, Comparison := "Across Survey Waves"]

ann_diffs <- final_data[,.(twdate_minus_survdate,surv_ann_agree,target)]
ann_diffs[, time_diff := twdate_minus_survdate]
ann_diffs[, Comparison := "Annotation vs. Survey"]
setnames(ann_diffs, "surv_ann_agree","agree")

diff_df <- rbind(survey_diffs[,.(time_diff,agree,Comparison,target)], 
      ann_diffs[,.(time_diff,agree,Comparison,target)])
diff_df[, months := ifelse(time_diff < -90,"3+\nBefore",
                      ifelse(time_diff < -30, "2-3\nBefore",
                      ifelse(time_diff < 30,"Within\na Month",
                      ifelse(time_diff < 60, "1\nAfter",
                      ifelse(time_diff < 90, "2\nAfter",
                      ifelse(time_diff < 120, "3\nAfter",
                      ifelse(time_diff < 150, "4\nAfter",
                             "5+\nAfter")))))))]

diff_df[, months:=factor(months,levels=unique(diff_df[order(time_diff)]$months)) ]
time2_plt <- ggplot(diff_df[!is.na(agree), 
                            as.list(binom.agresti.coull(sum(.SD$agree),.N)), 
                            by=.(Comparison,months)][x > 10], 
                    aes(months, mean,ymin=lower,ymax=upper,
                        color=Comparison,group=Comparison,shape=Comparison)) + 
  geom_pointrange(size=.8) + geom_line()+
  scale_y_continuous("Probability of Agreement",labels=percent) +
  scale_x_discrete("Months difference between annotation/survey\nor the two survey waves")+
  theme(legend.position='top')
time2_plt
ggsave("img/time_comp_surv.pdf",time2_plt,h=4,w=6)
