Our method involve comparing the maximum MSE reductions against the baseline (regressing from features drawn from Gaussian). How does different random seeds affect the experiment results? Do the error analysis in this notebook.

Load and prepare data

library(tictoc)
library(caret)

all_glue_tasks = c("rte", "cola", "mrpc", "sst2", "qnli", "qqp")
df <- read.csv("../reports/probing_results_1200_per_class/task1_predict_task_performance.csv")
dim(df)
[1] 25 98

1. Variance of the MSE_ctrl

For each rs: compute the MSE_ctrl.
Return the resulted MSE_ctrl w.r.t the number of features.

simulate_mse_ctrl <- function(N, nfeat, gt) {
  labels <- df[gt]
  set.seed(1234)
  
  results = c()
  for (i in 1:N) {
    x <- matrix(rnorm(nrow(df) * nfeat, 0, 0.1), 
                nrow=nrow(df), ncol=nfeat)
    ctrl_xydata <- cbind(x, labels)
    #print(sprintf("ctrl_xydata shape: %d, %d", nrow(ctrl_xydata), ncol(ctrl_xydata)))
    
    ctrl_model <- train(
      as.formula(sprintf("%s ~ .", gt)), 
      data=ctrl_xydata,
      method="lm",
      trControl=trainControl(method="cv", number=5))
    ctrl_RMSE <- sqrt(mean(summary(ctrl_model)$residual^2))
    results = c(results, ctrl_RMSE)
  }
  return(list(
    "mean"=mean(results),
    "std"=sd(results)
  ))
}


tic("Run simulation")
means <- matrix(rep(NA, length(all_glue_tasks)*3), length(all_glue_tasks), 3)
stds <- matrix(rep(NA, length(all_glue_tasks)*3), length(all_glue_tasks), 3)
ratios <- matrix(rep(NA, length(all_glue_tasks)*3), length(all_glue_tasks), 3)
n_sim = 100
for (i in 1:length(all_glue_tasks)) {
  gt = all_glue_tasks[i]
  results_3 = simulate_mse_ctrl(n_sim, 3, gt)
  results_7 = simulate_mse_ctrl(n_sim, 7, gt)
  results_12 = simulate_mse_ctrl(n_sim, 12, gt)
  means[i,] = c(results_3$mean, results_7$mean, results_12$mean)
  stds[i,] = c(results_3$std, results_7$std, results_12$std)
}
ratios = stds / means
ratios
           [,1]       [,2]      [,3]
[1,] 0.05711955 0.08997411 0.1516493
[2,] 0.05463718 0.09997548 0.1360310
[3,] 0.05214015 0.09466286 0.1563159
[4,] 0.05036794 0.09408959 0.1470137
[5,] 0.05367182 0.10019036 0.1407084
[6,] 0.05804047 0.09293523 0.1497007
toc()
Run simulation: 832.49 sec elapsed
apply(ratios, 1, mean)
[1] 0.09958097 0.09688122 0.10103963 0.09715706 0.09819018 0.10022546
plotdata = data.frame(ratios, row.names=all_glue_tasks)
names(plotdata)=c("3", "7", "12")
write.csv(plotdata, "random_seeds_ratios.csv", row.names=TRUE)
LS0tDQp0aXRsZTogIlJhbmRvbSBzZWVkcyBlZmZlY3RzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KT3VyIG1ldGhvZCBpbnZvbHZlIGNvbXBhcmluZyB0aGUgbWF4aW11bSBNU0UgcmVkdWN0aW9ucyBhZ2FpbnN0IHRoZSBiYXNlbGluZSAocmVncmVzc2luZyBmcm9tIGZlYXR1cmVzIGRyYXduIGZyb20gR2F1c3NpYW4pLiBIb3cgZG9lcyBkaWZmZXJlbnQgcmFuZG9tIHNlZWRzIGFmZmVjdCB0aGUgZXhwZXJpbWVudCByZXN1bHRzPyBEbyB0aGUgZXJyb3IgYW5hbHlzaXMgaW4gdGhpcyBub3RlYm9vay4NCg0KIyMgTG9hZCBhbmQgcHJlcGFyZSBkYXRhDQpgYGB7cn0NCmxpYnJhcnkodGljdG9jKQ0KbGlicmFyeShjYXJldCkNCg0KYWxsX2dsdWVfdGFza3MgPSBjKCJydGUiLCAiY29sYSIsICJtcnBjIiwgInNzdDIiLCAicW5saSIsICJxcXAiKQ0KZGYgPC0gcmVhZC5jc3YoIi4uL3JlcG9ydHMvcHJvYmluZ19yZXN1bHRzXzEyMDBfcGVyX2NsYXNzL3Rhc2sxX3ByZWRpY3RfdGFza19wZXJmb3JtYW5jZS5jc3YiKQ0KZGltKGRmKQ0KYGBgDQoNCiMjIDEuIFZhcmlhbmNlIG9mIHRoZSBNU0VfY3RybA0KDQpGb3IgZWFjaCByczogY29tcHV0ZSB0aGUgTVNFX2N0cmwuICANClJldHVybiB0aGUgcmVzdWx0ZWQgTVNFX2N0cmwgdy5yLnQgdGhlIG51bWJlciBvZiBmZWF0dXJlcy4NCg0KYGBge3J9DQpzaW11bGF0ZV9tc2VfY3RybCA8LSBmdW5jdGlvbihOLCBuZmVhdCwgZ3QpIHsNCiAgbGFiZWxzIDwtIGRmW2d0XQ0KICBzZXQuc2VlZCgxMjM0KQ0KICANCiAgcmVzdWx0cyA9IGMoKQ0KICBmb3IgKGkgaW4gMTpOKSB7DQogICAgeCA8LSBtYXRyaXgocm5vcm0obnJvdyhkZikgKiBuZmVhdCwgMCwgMC4xKSwgDQogICAgICAgICAgICAgICAgbnJvdz1ucm93KGRmKSwgbmNvbD1uZmVhdCkNCiAgICBjdHJsX3h5ZGF0YSA8LSBjYmluZCh4LCBsYWJlbHMpDQogICAgI3ByaW50KHNwcmludGYoImN0cmxfeHlkYXRhIHNoYXBlOiAlZCwgJWQiLCBucm93KGN0cmxfeHlkYXRhKSwgbmNvbChjdHJsX3h5ZGF0YSkpKQ0KICAgIA0KICAgIGN0cmxfbW9kZWwgPC0gdHJhaW4oDQogICAgICBhcy5mb3JtdWxhKHNwcmludGYoIiVzIH4gLiIsIGd0KSksIA0KICAgICAgZGF0YT1jdHJsX3h5ZGF0YSwNCiAgICAgIG1ldGhvZD0ibG0iLA0KICAgICAgdHJDb250cm9sPXRyYWluQ29udHJvbChtZXRob2Q9ImN2IiwgbnVtYmVyPTUpKQ0KICAgIGN0cmxfUk1TRSA8LSBzcXJ0KG1lYW4oc3VtbWFyeShjdHJsX21vZGVsKSRyZXNpZHVhbF4yKSkNCiAgICByZXN1bHRzID0gYyhyZXN1bHRzLCBjdHJsX1JNU0UpDQogIH0NCiAgcmV0dXJuKGxpc3QoDQogICAgIm1lYW4iPW1lYW4ocmVzdWx0cyksDQogICAgInN0ZCI9c2QocmVzdWx0cykNCiAgKSkNCn0NCg0KDQp0aWMoIlJ1biBzaW11bGF0aW9uIikNCm1lYW5zIDwtIG1hdHJpeChyZXAoTkEsIGxlbmd0aChhbGxfZ2x1ZV90YXNrcykqMyksIGxlbmd0aChhbGxfZ2x1ZV90YXNrcyksIDMpDQpzdGRzIDwtIG1hdHJpeChyZXAoTkEsIGxlbmd0aChhbGxfZ2x1ZV90YXNrcykqMyksIGxlbmd0aChhbGxfZ2x1ZV90YXNrcyksIDMpDQpyYXRpb3MgPC0gbWF0cml4KHJlcChOQSwgbGVuZ3RoKGFsbF9nbHVlX3Rhc2tzKSozKSwgbGVuZ3RoKGFsbF9nbHVlX3Rhc2tzKSwgMykNCm5fc2ltID0gMTAwDQpmb3IgKGkgaW4gMTpsZW5ndGgoYWxsX2dsdWVfdGFza3MpKSB7DQogIGd0ID0gYWxsX2dsdWVfdGFza3NbaV0NCiAgcmVzdWx0c18zID0gc2ltdWxhdGVfbXNlX2N0cmwobl9zaW0sIDMsIGd0KQ0KICByZXN1bHRzXzcgPSBzaW11bGF0ZV9tc2VfY3RybChuX3NpbSwgNywgZ3QpDQogIHJlc3VsdHNfMTIgPSBzaW11bGF0ZV9tc2VfY3RybChuX3NpbSwgMTIsIGd0KQ0KICBtZWFuc1tpLF0gPSBjKHJlc3VsdHNfMyRtZWFuLCByZXN1bHRzXzckbWVhbiwgcmVzdWx0c18xMiRtZWFuKQ0KICBzdGRzW2ksXSA9IGMocmVzdWx0c18zJHN0ZCwgcmVzdWx0c183JHN0ZCwgcmVzdWx0c18xMiRzdGQpDQp9DQpyYXRpb3MgPSBzdGRzIC8gbWVhbnMNCnJhdGlvcw0KdG9jKCkNCg0KYGBgDQpgYGB7cn0NCmFwcGx5KHJhdGlvcywgMSwgbWVhbikNCmBgYA0KYGBge3J9DQpwbG90ZGF0YSA9IGRhdGEuZnJhbWUocmF0aW9zLCByb3cubmFtZXM9YWxsX2dsdWVfdGFza3MpDQpuYW1lcyhwbG90ZGF0YSk9YygiMyIsICI3IiwgIjEyIikNCndyaXRlLmNzdihwbG90ZGF0YSwgInJhbmRvbV9zZWVkc19yYXRpb3MuY3N2Iiwgcm93Lm5hbWVzPVRSVUUpDQpgYGANCg0KDQo=