# Code in R language:

# load dataset
# 	each row represents the whole set of features (variables) of a fine-tuned model with its corresponding dependent variable (Pearson score)
#	and thus each column represents a feature
# 	example: features from a model after domain adaptation and fine-tuning 
#	Pearson_score	sample_name	TEXT_complexity_global	...	AVG_word_len	AVG_abstr_len	BIOMEDICAL_info	DAPT_1	DAPT_2	HCF_1	HCF_2	HCF_3	HCF_4	HCF_5	HCF_6	HCF_7	HCF_8
#     	       0.9485	  sample1		       0.338498956			 5.586909119	     104.4453	    0.439294636	     1	     0	    1	    0	    0	    0	    0	    0	    0	    0

dataset <- read.csv('dataset.csv')

# load required libraries
# to install libraries: install.packages("lmerTest"), install.packages("r2mlm")
library(lmerTest)
library(r2mlm)

# fit MLM
# 	inside the function 'lmer': left to the operator ~ is reserved for dependent variable and right to the operator is reserved for independent variables
# 	example:
	MLM_1 <- lmer(Pearson_score ~ 1 + AVG_word_len + AVG_abstr_len + BIOMEDICAL_info + DAPT_1 + HCF_1 + HCF_2 + HCF_3 + HCF_4 + HCF_5 + HCF_6 + HCF_7)
                               			        + (1 + DAPT_1 + HCF_1 + HCF_2 + HCF_3 + HCF_4 + HCF_5 + HCF_6 + HCF_7 | sample_name), data = dataset)
# note 1: it is necessary to leave out from the model the variables DAPT_2 and HCF_8 to avoid perfect collinearity (it can be other variables, not necessarily these ones, for example DAPT_1 and any other HCF). Coefficients from DAPT_1 and the rest HCFs are automatically compared against these variables; i.e., these variables serve as references
# note 2: the term "(1 + DAPT_1 + HCF_1 + HCF_2 + HCF_3 + HCF_4 + HCF_5 + HCF_6 + HCF_7 | sample_name)" indicates to add random intercepts and random coefficients for these level-1 variables; these coefficients will vary for each sample
# note 3: in this example, we are modeling the posible effects only of the independent variables displayed there; add the names of other variables to also model their possible effects
# note 4: the term '1' indicates the addition of an intercept

# to see results
	summary(MLM_1)
# to see random effects (random intercepts and random coefficients)
	ranef(MLM_1)

# to estimate R-squared effects
	r2mlm(MLM_1)

# to test if the random effects are statistically significant via likelihood ratio tests
	rand(MLM_1)
# note: this test is usually performed without level-2 variables, so it would be a good idea to remove all level-2 variables from the example above, fit again the MLM, and then test via the likelihood ratio test

# to do a backwards search
	step(MLM_1)

# convergence issues: sometimes is possible that the model does not converge. Changing the optimizer, or the maximum likelihood method can solve this issue
#	example:
	MLM_1 <- lmer(Pearson_score ~ 1 + AVG_word_len + AVG_abstr_len + BIOMEDICAL_info + DAPT_1 + HCF_1 + HCF_2 + HCF_3 + HCF_4 + HCF_5 + HCF_6 + HCF_7)
                               			        + (1 + DAPT_1 + HCF_1 + HCF_2 + HCF_3 + HCF_4 + HCF_5 + HCF_6 + HCF_7 | sample_name), REML=FALSE, data = dataset,
					control = lmerControl(optimizer ='optimx', optCtrl=list(method='bobyqa')))
# note: we can use the 'optimx' package to do so

We recommend following the official guidelines of each R package

