import os
import torch
from keras_preprocessing.sequence import pad_sequences

from BERT.BERT_wordpieces import OOVExampleHandler
from BERT.DefBERT import ParentModel, ParentExampleHandler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import pandas as pd
from utility.words_in_synset import SynsetCouple
from nltk.corpus import wordnet as wn


class PedersenDefinitionSisterCouple_AsBertInput:
    @staticmethod
    def instantiate(mode, tokenizer):
        if mode == 'def_bert_cls':
            return PedersenDefinitionSisterCouple_ExampleEmbedding(tokenizer)

        if mode == 'bert_wordpieces':
            return PedersenOOVSisterCouple_ExampleEmbedding(tokenizer)

        if mode == 'def_bert_head':
            return PedersenParentSisterCouple_ExampleEmbedding(tokenizer)

        if mode == 'bert_head_example':
            return PedersenParentFromExampleSisterCouple_ExampleEmbedding(tokenizer)
        raise NotImplemented('\'one_word_sentence\' and \'word_in_example\' are avaible modes')

    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def get_inputs_from(self, pedersen_synset_couples, output_path=None):
        raise NotImplementedError('Use one of subclasses: SynsetCouple_OneWordSentenceEmbedding, '
                                  'SynsetCouple_ExampleEmbedding')


class PedersenDefinitionSisterCouple_ExampleEmbedding(PedersenDefinitionSisterCouple_AsBertInput):
    def __init__(self, tokenizer):
        super().__init__(tokenizer)

    def get_inputs_from(self, pedersen_synset_couples, output_path=None):
        syn_list = []
        definition_list = []

        sister_syn_list = []
        sister_word_list = []
        sister_example_list = []

        value_list = []

        for (value, el) in pedersen_synset_couples:
            el: SynsetCouple = el
            sister_examples = el.s2.examples()

            sister_example = None
            for i in range(0, len(sister_examples)):
                if el.w2 in str(sister_examples[i]).split(' '):
                    sister_example = sister_examples[i]
                    break

            if sister_example is not None:
                syn_list.append(el.s1.name())
                definition_list.append(el.s1.definition())

                sister_syn_list.append(el.s2.name())
                sister_word_list.append(el.w2)
                sister_example_list.append(sister_example)

                value_list.append(value)
            else:
                raise (KeyError('Examples not found'))

        df = pd.DataFrame([])
        df['syn'] = syn_list
        df['definition'] = definition_list

        df['sister_syn'] = sister_syn_list
        df['sister_word'] = sister_word_list
        df['sister_example'] = sister_example_list
        if output_path is not None:
            df.to_csv(output_path)

        sentences_definitions = ["[CLS] " + d + " [SEP]" for d in df.definition.values]

        sister_sentences_word = ["[CLS] " + w + " [SEP]" for w in df.sister_word.values]
        sister_sentences_examples = ["[CLS] " + e + " [SEP]" for e in df.sister_example.values]

        tokenized_definitions = []

        tokenized_sister = [self.tokenizer.tokenize(w) for w in sister_sentences_word]
        tokenized_examples_sister = []
        indexes_sister = []

        for i in range(0, len(tokenized_sister)):
            # if len(tokenized_sister[i]) == 3:
            tokenized_definition = self.tokenizer.tokenize(sentences_definitions[i])
            tokenized_definitions.append(tokenized_definition)

            tokenized_example_sister = self.tokenizer.tokenize(sister_sentences_examples[i])
            j_sister = tokenized_example_sister.index(tokenized_sister[i][1])
            tokenized_examples_sister.append(tokenized_example_sister)
            indexes_sister.append(j_sister)

            """print(df.definition.values[i])
                print(df.sister_word.values[i])
                print(df.sister_example.values[i])

                print(tokenized_definition)
                print(tokenized_example_sister)
                print(tokenized_sister[i])
                print(j_sister)"""

        # else:
        # raise KeyError('words tokenized as:' + str(tokenized_sister[i]) + ' cause it is not in vocabulary')

        input_ids_definitions = pad_sequences([self.tokenizer.convert_tokens_to_ids(t) for t in tokenized_definitions],
                                              maxlen=len(max(tokenized_definitions, key=lambda x: len(x))),
                                              dtype="long", truncating="post", padding="post")

        input_ids_example_sister = pad_sequences(
            [self.tokenizer.convert_tokens_to_ids(t) for t in tokenized_examples_sister],
            maxlen=len(max(tokenized_examples_sister, key=lambda x: len(x))),
            dtype="long", truncating="post", padding="post")

        return torch.tensor(input_ids_definitions, device=device), \
               torch.tensor(input_ids_example_sister, device=device), torch.tensor(indexes_sister, device=device), \
               value_list


class PedersenParentSisterCouple_ExampleEmbedding(PedersenDefinitionSisterCouple_AsBertInput):
    def __init__(self, tokenizer):
        super().__init__(tokenizer)
        self.parent_model = ParentModel('bert-base-uncased')

    def get_inputs_from(self, pedersen_synset_couples, output_path=None):
        syn_list = []
        parent_list = []
        parent_example_list = []

        sister_syn_list = []
        sister_word_list = []
        sister_example_list = []

        value_list = []

        for (value, el) in pedersen_synset_couples:
            el: SynsetCouple = el

            try:
                parent, parent_syns = self.parent_model.in_voc_parent(el.w1, el.s_pos[0].lower(), el.s1)
            except KeyError as e:
                parent, parent_syns = 'entity', wn.synsets('entity')[0]

            parent_example = None

            if parent_example is None:
                for w in el.s1.definition().split(' '):
                    if w.startswith(parent) and (len(w) - len(parent)) in range(-3, 3):
                        parent_example = el.s1.definition()
                        break
            if parent_example is None:
                parent_example = parent

            sister_examples = el.s2.examples()
            sister_example = None
            for i in range(0, len(sister_examples)):
                if el.w2 in str(sister_examples[i]).split(' '):
                    sister_example = sister_examples[i]
                    break

            if sister_example is not None:
                syn_list.append(el.s1.name())
                parent_list.append(parent)
                parent_example_list.append(parent_example)

                sister_syn_list.append(el.s2.name())
                sister_word_list.append(el.w2)
                sister_example_list.append(sister_example)

                value_list.append(value)

            else:
                raise (KeyError('Examples not found'))

        df = pd.DataFrame([])
        df['syn'] = syn_list
        df['parent'] = parent_list
        df['parent_example'] = parent_example_list

        df['sister_syn'] = sister_syn_list
        df['sister_word'] = sister_word_list
        df['sister_example'] = sister_example_list
        if output_path is not None:
            df.to_csv(output_path)

        sentences_parent = ["[CLS] " + parent + " [SEP]" for parent in df.parent.values]
        sentences_parent_examples = ["[CLS] " + eparent + " [SEP]" for eparent in df.parent_example.values]

        sister_sentences_word = ["[CLS] " + w + " [SEP]" for w in df.sister_word.values]
        sister_sentences_examples = ["[CLS] " + e + " [SEP]" for e in df.sister_example.values]

        tokenized_parents = [self.tokenizer.tokenize(parent) for parent in sentences_parent]
        tokenized_parent_examples = []
        indexes_parent = []

        tokenized_sister = [self.tokenizer.tokenize(w) for w in sister_sentences_word]
        tokenized_examples_sister = []
        indexes_sister = []

        for i in range(0, len(tokenized_sister)):
            if len(tokenized_sister[i]) == 3 and len(tokenized_parents[i]) == 3:

                tokenized_parent_example = self.tokenizer.tokenize(sentences_parent_examples[i])
                j_parent = None
                for k in range(0, len(tokenized_parent_example)):
                    w = tokenized_parent_example[k]
                    if w.startswith(tokenized_parents[i][1]) and \
                            (len(w) - len(tokenized_parents[i][1])) in range(-3, 3):
                        j_parent = k
                        break
                tokenized_parent_examples.append(tokenized_parent_example)
                indexes_parent.append(j_parent)
                """print("----------------")
                print(tokenized_parent_example)
                print(j_parent)
                print(tokenized_parents[i][1])"""

                tokenized_example_sister = self.tokenizer.tokenize(sister_sentences_examples[i])
                j_sister = None
                for k in range(0, len(tokenized_example_sister)):
                    w = tokenized_example_sister[k]
                    if w.startswith(tokenized_sister[i][1]) and (len(w) - len(tokenized_sister[i][1])) in range(-3, 3):
                        j_sister = k
                        break

                tokenized_examples_sister.append(tokenized_example_sister)
                indexes_sister.append(j_sister)

                """print("----------------")
                print(tokenized_example_sister)
                print(j_sister)
                print(tokenized_sister[i][1])"""
            else:
                raise KeyError('words tokenized as:' + str(tokenized_sister[i]) + ' cause it is not in vocabulary')

        input_ids_example_parent = pad_sequences(
            [self.tokenizer.convert_tokens_to_ids(t) for t in tokenized_parent_examples],
            maxlen=len(max(tokenized_parent_examples, key=lambda x: len(x))),
            dtype="long", truncating="post", padding="post")

        input_ids_example_sister = pad_sequences(
            [self.tokenizer.convert_tokens_to_ids(t) for t in tokenized_examples_sister],
            maxlen=len(max(tokenized_examples_sister, key=lambda x: len(x))),
            dtype="long", truncating="post", padding="post")
        return torch.tensor(input_ids_example_parent, device=device), torch.tensor(indexes_parent, device=device), \
               torch.tensor(input_ids_example_sister, device=device), torch.tensor(indexes_sister, device=device), \
               value_list


class PedersenOOVSisterCouple_ExampleEmbedding(PedersenDefinitionSisterCouple_AsBertInput):
    def __init__(self, tokenizer):
        super().__init__(tokenizer)
        path = 'data_BERT/sentences/oov_in_example/oov_in_sentence.csv'
        self.oov_example_handler = OOVExampleHandler(path_pos=[('_', path)])


    @staticmethod
    def find_sub_list(sublist, l):
        sublistlen=len(sublist)
        for ind in (i for i,e in enumerate(l) if e == sublist[0]):
            if l[ind:ind+sublistlen]==sublist:
                return ind,ind+sublistlen-1

    def get_inputs_from(self, pedersen_synset_couples, output_path=None):
        missings_examples = []

        syn_list = []
        oov_list = []
        oov_examples_list = []

        sister_syn_list = []
        sister_word_list = []
        sister_example_list = []

        value_list = []

        for (value, el) in pedersen_synset_couples:
            el: SynsetCouple = el
            oov_examples = el.s1.examples()
            sister_examples = el.s2.examples()

            oov_example = None
            for i in range(0, len(oov_examples)):
                if el.w1 in str(oov_examples[i]).split(' '):
                    oov_example = oov_examples[i]
                    break
            if oov_example is None:
                oov_example = self.oov_example_handler.get_example(pos='_', oov=el.w1)

            sister_example = None
            for i in range(0, len(sister_examples)):
                if el.w2 in str(sister_examples[i]).split(' '):
                    sister_example = sister_examples[i]
                    break

            if sister_example is not None:
                if oov_example is None:
                    missings_examples.append(el.w1)
                    oov_example = el.w1

                syn_list.append(el.s1.name())
                oov_list.append(el.w1)
                oov_examples_list.append(oov_example)

                sister_syn_list.append(el.s2.name())
                sister_word_list.append(el.w2)
                sister_example_list.append(sister_example)

                value_list.append(value)
            else:
                raise (KeyError('Examples not found'))

        with open(output_path.split('.')[0]+'_no_examples_after.txt', 'w+') as f:
            lines = [x +"\t#\n" for x in missings_examples]
            f.writelines(lines)

        df = pd.DataFrame([])
        df['syn'] = syn_list
        df['oov'] = oov_list
        df['oov_example'] = oov_examples_list

        df['sister_syn'] = sister_syn_list
        df['sister_word'] = sister_word_list
        df['sister_example'] = sister_example_list

        if output_path is not None:
            df.to_csv(output_path)

        sentences_oov = ["[CLS] " + o + " [SEP]" for o in df.oov.values]
        sentences_oov_example = ["[CLS] " + e + " [SEP]" for e in df.oov_example.values]

        sister_sentences_word = ["[CLS] " + w + " [SEP]" for w in df.sister_word.values]
        sister_sentences_examples = ["[CLS] " + e + " [SEP]" for e in df.sister_example.values]

        tokenized_oovs = [self.tokenizer.tokenize(o) for o in sentences_oov]
        tokenized_oov_examples = []
        indexes_oovs = []

        tokenized_sister = [self.tokenizer.tokenize(w) for w in sister_sentences_word]
        tokenized_examples_sister = []
        indexes_sister = []

        for i in range(0, len(tokenized_sister)):
            if len(tokenized_sister[i]) == 3:
                tokenized_oov_example = self.tokenizer.tokenize(sentences_oov_example[i])
                j_1, j_2 = PedersenOOVSisterCouple_ExampleEmbedding.find_sub_list(tokenized_oovs[i][1:-1], tokenized_oov_example)
                tokenized_oov_examples.append(tokenized_oov_example)
                indexes_oovs.append([j_1, j_2])

                tokenized_example_sister = self.tokenizer.tokenize(sister_sentences_examples[i])
                j_sister = tokenized_example_sister.index(tokenized_sister[i][1])
                tokenized_examples_sister.append(tokenized_example_sister)
                indexes_sister.append(j_sister)

                """print('----------')
                print(tokenized_oov_example)
                print(tokenized_oovs[i])
                print(j_1, j_2)
                print('---')
                print(tokenized_example_sister)
                print(tokenized_sister[i])
                print(j_sister)
                print('----------')"""
            else:
                raise KeyError('words tokenized as:' + str(tokenized_sister[i]) + ' cause it is not in vocabulary')

        input_ids_oov_examples = pad_sequences([self.tokenizer.convert_tokens_to_ids(t) for t in tokenized_oov_examples],
                                              maxlen=len(max(tokenized_oov_examples, key=lambda x: len(x))),
                                              dtype="long", truncating="post", padding="post")

        input_ids_example_sister = pad_sequences(
            [self.tokenizer.convert_tokens_to_ids(t) for t in tokenized_examples_sister],
            maxlen=len(max(tokenized_examples_sister, key=lambda x: len(x))),
            dtype="long", truncating="post", padding="post")

        return torch.tensor(input_ids_oov_examples, device=device), torch.tensor(indexes_oovs, device=device),\
               torch.tensor(input_ids_example_sister, device=device), torch.tensor(indexes_sister, device=device), \
               value_list


class PedersenRandomSisterCouple_ExampleEmbedding(PedersenDefinitionSisterCouple_AsBertInput):
    def __init__(self, tokenizer):
        super().__init__(tokenizer)
        self.parent_model = ParentModel('bert-base-uncased')

    def get_inputs_from(self, pedersen_synset_couples, output_path=None):
        syn_list = []
        parent_list = []
        parent_example_list = []

        sister_syn_list = []
        sister_word_list = []
        sister_example_list = []

        value_list = []

        for (value, el) in pedersen_synset_couples:
            el: SynsetCouple = el

            try:
                parent, parent_syns = self.parent_model.in_voc_parent(el.w1, el.s_pos[0].lower(), el.s1)
            except KeyError as e:
                parent, parent_syns = 'entity', wn.synsets('entity')[0]

            parent_example = None
            if parent_example is None:
                for w in el.s1.definition().split(' '):
                    if w.startswith(parent) and (len(w) - len(parent)) in range(-3, 3):
                        parent_example = el.s1.definition()
                        break

            if parent_example is None:
                parent_example = parent

            sister_examples = el.s2.examples()
            sister_example = None
            for i in range(0, len(sister_examples)):
                if el.w2 in str(sister_examples[i]).split(' '):
                    sister_example = sister_examples[i]
                    break

            if sister_example is not None:
                syn_list.append(el.s1.name())
                parent_list.append(parent)
                parent_example_list.append(parent_example)

                sister_syn_list.append(el.s2.name())
                sister_word_list.append(el.w2)
                sister_example_list.append(sister_example)

                value_list.append(value)

            else:
                raise (KeyError('Examples not found'))

        df = pd.DataFrame([])
        df['syn'] = syn_list
        df['parent'] = parent_list
        df['parent_example'] = parent_example_list

        df['sister_syn'] = sister_syn_list
        df['sister_word'] = sister_word_list
        df['sister_example'] = sister_example_list
        if output_path is not None:
            df.to_csv(output_path)

        sentences_parent = ["[CLS] " + parent + " [SEP]" for parent in df.parent.values]
        sentences_parent_examples = ["[CLS] " + eparent + " [SEP]" for eparent in df.parent_example.values]

        sister_sentences_word = ["[CLS] " + w + " [SEP]" for w in df.sister_word.values]
        sister_sentences_examples = ["[CLS] " + e + " [SEP]" for e in df.sister_example.values]

        tokenized_parents = [self.tokenizer.tokenize(parent) for parent in sentences_parent]
        tokenized_parent_examples = []
        indexes_parent = []

        tokenized_sister = [self.tokenizer.tokenize(w) for w in sister_sentences_word]
        tokenized_examples_sister = []
        indexes_sister = []

        for i in range(0, len(tokenized_sister)):
            if len(tokenized_sister[i]) == 3 and len(tokenized_parents[i]) == 3:

                tokenized_parent_example = self.tokenizer.tokenize(sentences_parent_examples[i])
                j_parent = None
                for k in range(0, len(tokenized_parent_example)):
                    w = tokenized_parent_example[k]
                    if w.startswith(tokenized_parents[i][1]) and \
                            (len(w) - len(tokenized_parents[i][1])) in range(-3, 3):
                        j_parent = k
                        break
                tokenized_parent_examples.append(tokenized_parent_example)
                indexes_parent.append(j_parent)
                """print("----------------")
                print(tokenized_parent_example)
                print(j_parent)
                print(tokenized_parents[i][1])"""

                tokenized_example_sister = self.tokenizer.tokenize(sister_sentences_examples[i])
                j_sister = None
                for k in range(0, len(tokenized_example_sister)):
                    w = tokenized_example_sister[k]
                    if w.startswith(tokenized_sister[i][1]) and (len(w) - len(tokenized_sister[i][1])) in range(-3, 3):
                        j_sister = k
                        break

                tokenized_examples_sister.append(tokenized_example_sister)
                indexes_sister.append(j_sister)

                """print("----------------")
                print(tokenized_example_sister)
                print(j_sister)
                print(tokenized_sister[i][1])"""
            else:
                raise KeyError('words tokenized as:' + str(tokenized_sister[i]) + ' cause it is not in vocabulary')

        input_ids_example_parent = pad_sequences(
            [self.tokenizer.convert_tokens_to_ids(t) for t in tokenized_parent_examples],
            maxlen=len(max(tokenized_parent_examples, key=lambda x: len(x))),
            dtype="long", truncating="post", padding="post")

        input_ids_example_sister = pad_sequences(
            [self.tokenizer.convert_tokens_to_ids(t) for t in tokenized_examples_sister],
            maxlen=len(max(tokenized_examples_sister, key=lambda x: len(x))),
            dtype="long", truncating="post", padding="post")
        return torch.tensor(input_ids_example_parent, device=device), torch.tensor(indexes_parent, device=device), \
               torch.tensor(input_ids_example_sister, device=device), torch.tensor(indexes_sister, device=device), \
               value_list


class PedersenParentFromExampleSisterCouple_ExampleEmbedding(PedersenDefinitionSisterCouple_AsBertInput):
    def __init__(self, tokenizer):
        super().__init__(tokenizer)
        self.parent_model = ParentModel('bert-base-uncased')
        path_1 = 'data_BERT/sentences/parent_from_example/parent_sentence_2.csv'
        path_2 = 'data_BERT/sentences/comparable_parent_from_example/parent_sentence.csv'
        path_3 = 'data_BERT/sentences/comparable_parent_from_example/parent_sentence_2.csv'
        base_path = 'data_BERT/sentences/task1/parent_from_example'

        self.parent_example_handler = ParentExampleHandler(path_pos=[('_', path_1), ('_', path_2), ('_', path_3),
                                                                     ('_', os.path.join(base_path,
                                                                                        'n_output_parent_non_pulito.csv')),
                                                                     ('_', os.path.join(base_path,
                                                                                        'v_output_parent_non_pulito.csv'))])


    def get_inputs_from(self, pedersen_synset_couples, output_path=None):
        syn_list = []
        parent_list = []
        parent_example_list = []

        sister_syn_list = []
        sister_word_list = []
        sister_example_list = []

        value_list = []

        for (value, el) in pedersen_synset_couples:
            el: SynsetCouple = el

            try:
                parent, _ = self.parent_model.in_voc_parent(el.w1, el.s_pos[0].lower(), el.s1)
            except KeyError:
                parent, _ = 'entity', wn.synsets('entity')[0]

            sister_examples = el.s2.examples()
            sister_example = None
            for i in range(0, len(sister_examples)):
                if el.w2 in str(sister_examples[i]).split(' '):
                    sister_example = sister_examples[i]
                    break

            if sister_example is not None:
                parent_example = self.parent_example_handler.get_example(pos='_', parent=parent)
                splitted_paren_example = parent_example.split()
                found = False
                for k in range(0, len(splitted_paren_example)):
                    w = splitted_paren_example[k]
                    if (w.startswith(parent) and (len(w) - len(parent)) in range(-3, 3)) or w == parent + 'ing':
                        found = True
                        break
                if not found:
                    parent_example = parent


                syn_list.append(el.s1.name())
                parent_list.append(parent)
                parent_example_list.append(parent_example)

                sister_syn_list.append(el.s2.name())
                sister_word_list.append(el.w2)
                sister_example_list.append(sister_example)

                value_list.append(value)

            else:
                raise (KeyError('Examples not found'))

        df = pd.DataFrame([])
        df['syn'] = syn_list
        df['parent'] = parent_list
        df['parent_example'] = parent_example_list

        df['sister_syn'] = sister_syn_list
        df['sister_word'] = sister_word_list
        df['sister_example'] = sister_example_list
        if output_path is not None:
            df.to_csv(output_path)

        sentences_parent = ["[CLS] " + parent + " [SEP]" for parent in df.parent.values]
        sentences_parent_examples = ["[CLS] " + eparent + " [SEP]" for eparent in df.parent_example.values]

        sister_sentences_word = ["[CLS] " + w + " [SEP]" for w in df.sister_word.values]
        sister_sentences_examples = ["[CLS] " + e + " [SEP]" for e in df.sister_example.values]

        tokenized_parents = [self.tokenizer.tokenize(parent) for parent in sentences_parent]
        tokenized_parent_examples = []
        indexes_parent = []

        tokenized_sister = [self.tokenizer.tokenize(w) for w in sister_sentences_word]
        tokenized_examples_sister = []
        indexes_sister = []

        for i in range(0, len(tokenized_sister)):
            if len(tokenized_sister[i]) == 3 and len(tokenized_parents[i]) == 3:

                tokenized_parent_example = self.tokenizer.tokenize(sentences_parent_examples[i])
                j_parent = None
                for k in range(0, len(tokenized_parent_example)):
                    w = tokenized_parent_example[k]
                    if (w.startswith(tokenized_parents[i][1]) and \
                        (len(w) - len(tokenized_parents[i][1])) in range(-3, 3)) \
                            or w == tokenized_parents[i][1] + 'ing':
                        j_parent = k
                        break
                tokenized_parent_examples.append(tokenized_parent_example)
                indexes_parent.append(j_parent)
                """print("----------------")
                print(tokenized_parent_example)
                print(j_parent)
                print(tokenized_parents[i][1])"""

                tokenized_example_sister = self.tokenizer.tokenize(sister_sentences_examples[i])
                j_sister = None
                for k in range(0, len(tokenized_example_sister)):
                    w = tokenized_example_sister[k]
                    if w.startswith(tokenized_sister[i][1]) and (len(w) - len(tokenized_sister[i][1])) in range(-3, 3):
                        j_sister = k
                        break

                tokenized_examples_sister.append(tokenized_example_sister)
                indexes_sister.append(j_sister)

                """print("----------------")
                print(tokenized_example_sister)
                print(j_sister)
                print(tokenized_sister[i][1])"""


            else:
                raise KeyError('words tokenized as:' + str(tokenized_sister[i]) + ' cause it is not in vocabulary')

        input_ids_example_parent = pad_sequences(
            [self.tokenizer.convert_tokens_to_ids(t) for t in tokenized_parent_examples],
            maxlen=len(max(tokenized_parent_examples, key=lambda x: len(x))),
            dtype="long", truncating="post", padding="post")

        input_ids_example_sister = pad_sequences(
            [self.tokenizer.convert_tokens_to_ids(t) for t in tokenized_examples_sister],
            maxlen=len(max(tokenized_examples_sister, key=lambda x: len(x))),
            dtype="long", truncating="post", padding="post")
        return torch.tensor(input_ids_example_parent, device=device), torch.tensor(indexes_parent, device=device), \
               torch.tensor(input_ids_example_sister, device=device), torch.tensor(indexes_sister, device=device), \
               value_list
