class Converter(object):
    @staticmethod
    def convert_examples_to_features(
            examples,
            intent_list,
            label_list,
            max_seq_length,
            tokenizer
    ):

        """ Loads a data file into a list of `InputBatch`s
            `cls_token_at_end` define the location of the CLS token:
                - False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
                - True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS]
            `cls_token_segment_id` define the segment id associated to the CLS token (0 for BERT, 2 for XLNet)
        """
        label_map = label_list
        cls_token = tokenizer.cls_token
        pad_token_id = tokenizer.pad_token_id
        pad_token_segment_id = tokenizer.pad_token_type_id
        pad_label_id = -100
        wrong_num = 0
        features = []
        for (ex_index, example) in enumerate(examples):
            tokens = []
            label_ids = []
            if example.intent not in intent_list:
                wrong_num += 1
                continue
            something_wrong = False
            intent_id = intent_list.index(example.intent)
            for word, label in zip(example.words, example.labels):
                word_tokens = tokenizer.tokenize(word)
                tokens.extend(word_tokens)
                if label not in label_map:
                    something_wrong = True
                    wrong_num += 1
                    break
                label_ids.extend([label_map[label]] + [pad_label_id] * (len(word_tokens) - 1))
            if something_wrong:
                continue
            special_tokens_count = 1
            if len(tokens) > max_seq_length - special_tokens_count:
                tokens = tokens[: (max_seq_length - special_tokens_count)]
                label_ids = label_ids[: (max_seq_length - special_tokens_count)]

            tokens = [cls_token] + tokens
            label_ids = [pad_label_id] + label_ids
            segment_ids = [0] * len(tokens)

            input_ids = tokenizer.convert_tokens_to_ids(tokens)
            input_mask = [1] * len(input_ids)
            # Zero-pad up to the sequence length.
            padding_length = max_seq_length - len(input_ids)
            input_ids += [pad_token_id] * padding_length
            input_mask += [0] * padding_length
            segment_ids += [pad_token_segment_id] * padding_length
            label_ids += [pad_label_id] * padding_length

            assert len(input_ids) == max_seq_length and len(input_mask) == max_seq_length and len(
                segment_ids) == max_seq_length and len(label_ids) == max_seq_length

            features.append(
                InputFeatures(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, label_ids=label_ids,
                              intent_id=intent_id)
            )

        # print(f"wrong num: {wrong_num}")
        return features

    @staticmethod
    def convert_tokens_to_ids_with_padding(
            tokens,
            max_seq_length,
            tokenizer
    ):
        pad_token_id=tokenizer.pad_token_id
        pad_token_segment_id=tokenizer.pad_token_segment_id
        segment_ids = [0] * len(tokens)
        input_ids = tokenizer.convert_tokens_to_ids(tokens)
        input_mask = [1] * len(input_ids)
        # Zero-pad up to the sequence length.
        padding_length = max_seq_length - len(input_ids)
        input_ids += [pad_token_id] * padding_length
        input_mask += [0] * padding_length
        segment_ids += [pad_token_segment_id] * padding_length
        return input_ids,input_mask,segment_ids



class InputFeatures(object):
    """A single set of features of data."""
    def __init__(self, input_ids, input_mask, segment_ids, label_ids, intent_id):
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_ids = label_ids
        self.intent_id = intent_id