library(data.table)
library(tidyverse)
library(bit64)
library(binom)
library(irr)
library(scales)
theme_set(theme_bw(15))
read_simple_user_info <- function(filename){
  return(fread(filename, sep="\t",col.names =c("uid",
                                               'name',
                                               "user_screenname",
                                               'url',
                                               'protected',
                                               'location',
                                               'description',
                                               "followers_count",
                                               "friends_count",
                                               "created_at",
                                               "utc_offset",
                                               'time_zone',
                                               "statuses_count",
                                               "lang",
                                               "status_created_at",
                                               'status_coordinates',
                                               "status_lang",
                                               "profile_image_url_https","verified")))
}


read_simple_user_info_w_nulls <- function(inFile){
  
  r = readBin(inFile, raw(), file.info(inFile)$size)
  r[r==as.raw(0)] = as.raw(0x20)
  tfile = tempfile(fileext=".txt")
  writeBin(r, tfile)
  rm(r)
  inFile = tfile
  
  return(read_simple_user_info(inFile))
}

get_trump_stance <- function(surv_tweet,remove_users=T){
  surv_tweet[, survey_stance := ifelse(is.na(cand20),voted20,cand20)]
  if(remove_users){
    surv_tweet <- surv_tweet[!is.na(survey_stance) & survey_stance %in% c(1,2,5)]
  }
  
  surv_tweet[, survey_stance := ifelse(survey_stance == 1, 
                                       "Pro-Biden",ifelse(survey_stance == 2,
                                       "Pro-Trump", 
                                       ifelse(survey_stance == 5, "Neutral",
                                              NA)))]
  surv_tweet[, survey_stance := factor(survey_stance,
                                       levels=c("Pro-Biden","Neutral","Pro-Trump"))]
  return(surv_tweet)
}

get_vax_stance <- function(surv_tweet){
  surv_tweet$survey_stance <- ifelse(surv_tweet$vax_stance == "resistant",
                                     "Anti-Vax",
                                     ifelse(surv_tweet$vax_stance  == "hesitant",
                                            "Neutral",
                                            ifelse(surv_tweet$vax_stance =="already_or_enthusiastic",
                                                   "Pro-Vax", NA)))
  surv_tweet[, survey_stance := factor(survey_stance,
                                       levels=c("Anti-Vax","Neutral","Pro-Vax"))]
  return(surv_tweet)
}

get_lockdown_stance <- function(surv_tweet){
  surv_tweet[, survey_stance := cov_stop_1+cov_stop_2+cov_stop_3+cov_stop_5]
  surv_tweet[, survey_stance := ifelse(survey_stance < 7, "Anti-Lockdown",
                                       ifelse(survey_stance > 13,"Pro-Lockdown",
                                              ifelse(is.na(survey_stance), NA, "Neutral")))]
  surv_tweet[, survey_stance := factor(survey_stance,
                                       levels=c("Anti-Lockdown","Neutral","Pro-Lockdown"))]
  return(surv_tweet)
}

get_mask_stance <- function(surv_tweet){
 surv_tweet[, survey_stance := ifelse(prevent_7 == 2 | cov_beh_5 == 1, "Anti-Mask",
                                      ifelse(cov_beh_5 ==4, "Pro-Mask",
                                          ifelse(is.na(cov_beh_5) & is.na(prevent_7),
                                                 NA,
                                             "Neutral")))] 
surv_tweet[, survey_stance := factor(survey_stance,
                                       levels=c("Anti-Mask","Neutral","Pro-Mask"))]
}

get_surv_tweet <- function(fin_survey, config){
  tweets <- fread(config$file)
  print(paste("N Tweets in",config$file,nrow(tweets) ))
  tweets <- tweets[userid %in% fin_survey$uid ]   
  print(paste("N Tweets left after link to survey",nrow(tweets) ))
  
  # who tweeted about this thing?
  surv_tweet <- merge(fin_survey,
                      tweets[,list(n_ontopic_tweets=.N), by=userid],
                      by.x="uid",
                      by.y="userid",
                      all.x=T)
  surv_tweet[is.na(n_ontopic_tweets)]$n_ontopic_tweets <- 0
  
  surv_tweet <- config$compute_stance_fn(surv_tweet)
  surv_tweet$target <- config$target
  
  qnt <- quantile(surv_tweet[n_ontopic_tweets > 0]$n_ontopic_tweets,
                  seq(0,1,by=.33) )
  print(qnt)
  surv_tweet[, activity_level := ifelse(n_ontopic_tweets == 0,"None",
                                        ifelse(n_ontopic_tweets < qnt[2],
                                               "Low", 
                                               ifelse(n_ontopic_tweets < qnt[3],
                                                      "Moderate","High")))]
  
  return(surv_tweet)
}


reverse_get_stance <- function(survey_stance){
  surv <- ifelse(grepl("Pro-",survey_stance),"Pro", 
            ifelse(grepl("Anti-", survey_stance), "Anti","Neutral"))
  surv[survey_stance=="Pro-Biden"]<- "Anti"
  surv[is.na(survey_stance)] <- NA
  return(surv)
}

get_tweets <- function(fin_survey, config){
  surv_tweet <- get_surv_tweet(fin_survey,config)
  tweets <- fread(config$file)
  # take random wave
  set.seed(42)
  surv_tweet <- surv_tweet[!is.na(survey_stance) & activity_level != "None"]
  surv_tweet <- surv_tweet[sample(nrow(surv_tweet)),]
  users_to_sample <- surv_tweet[
    !duplicated(uid), .(survey_stance,activity_level,target,uid)
  ]
  tweets <- merge(tweets, users_to_sample,by.x='userid',by.y='uid')
  return(tweets)
}


get_responses <- function(annotator,filename){
  annotation_data <- fread(filename)
  annotation_data <- annotation_data[3,]
  annotation_data <- annotation_data[, 18:ncol(annotation_data)]
  annotation_data$annotator <- annotator
  mlt <- data.table(melt(annotation_data,"annotator"))
  response <- mlt[seq(1,nrow(mlt),by=2)]
  conf <- mlt[seq(2,nrow(mlt),by=2)]
  response[, uid := str_split_fixed(variable,"_",2)[,1]]
  response[, tid := str_split_fixed(variable,"_",2)[,2]]
  conf[, uid := str_split_fixed(variable,"_",3)[,1]]
  conf[, tid := str_split_fixed(variable,"_",3)[,2]]
  response$variable <- NULL
  conf$variable <- NULL
  conf[value ==""]$value <- "Somewhat"
  responses <- merge(response,conf,by=c("annotator","tid","uid"))
  setnames(responses,c("value.x","value.y"),c("response","confidence"))
  responses[, target := ifelse(grepl("Lockdown", response), "Lockdowns",
                               ifelse(grepl("Mask", response), "Masks",
                                      ifelse(grepl("Vaccine",response),"Vaccines","Trump")))]
  responses[, stance := ifelse(grepl("Anti", response), "Anti",
                               ifelse(grepl("^Un", response), "Neutral","Pro"))]
  responses[grepl("Biden",response)]$stance <- "Anti"
  return(responses)
}


run_alpha <- function(df){
  k <- kripp.alpha(rbind(as.integer(factor(df$a1,levels=c("Anti","Neutral","Pro"))),
                         as.integer(factor(df$a2,levels=c("Anti","Neutral","Pro"))),
                         as.integer(factor(df$a3,levels=c("Anti","Neutral","Pro"))),
                         as.integer(factor(df$a4,levels=c("Anti","Neutral","Pro"))),
                         as.integer(factor(df$a5,levels=c("Anti","Neutral","Pro")))))
  return(data.table(subj=k$subjects,val=k$value))
}

get_c <- function(vec){
  vec <- sort(vec[!is.na(vec)])
  return(list(a1_conf=vec[1],
              a2_conf=vec[2]))
}


survey_differences <- function(dat,stance_col){
  dat <- dat[order(survey_date)]
  df= data.table()
  for(i in 1:(nrow(dat)-1)){
    for(j in (i+1):nrow(dat)){
      df <- rbind(df,
                  data.table(d1=dat[i]$survey_date,
                             d2=dat[j]$survey_date,
                             s1=dat[i,get(stance_col)],
                             s2=dat[j,get(stance_col)]))
    }
  }
  return(df)
}

VAX_CONFIG = list(
  target = "Vaccines",
  file= 'vaccine_tweets.tsv', 
  compute_stance_fn=get_vax_stance
)
TRUMP_CONFIG = list(
  target = "Trump",
  file= 'trump_tweets.tsv', 
  compute_stance_fn = get_trump_stance
)

LOCKDOWN_CONFIG = list(
  target="Lockdowns",
  file= 'lockdown_tweets.tsv', 
  compute_stance_fn = get_lockdown_stance
)

MASK_CONFIG = list(
  target = "Masks",
  file= 'mask_tweets.tsv', 
  compute_stance_fn = get_mask_stance
)
