from fieldembed.keyedvectors import KeyedVectors
from pprint import pprint
import os
# this should be removed inside fieldembed
########################################################################################
CN_CHAR = {
    # CTX_IND
    'token':   {'Max_Ngram': 1,},
    'subcomp': {'Min_Ngram': 1, 'Max_Ngram': 4, 'end_grain': False,  'min_grain_freq' : 1152},
    'pinyin':  {'Min_Ngram': 1, 'Max_Ngram': 4, 'end_grain': False,  'min_grain_freq' : 10},
    'pos':     {'tagScheme': 'BIOES'},
    'medpos':  {'tagScheme': 'BIOES'}
}

EN = {
    # CTX_IND
    'token':   {'Max_Ngram': 1,},
    'char':    {'Min_Ngram': 1, 'Max_Ngram': 3, 'end_grain': False,  'min_grain_freq' : 2514},
    'phoneme': {'Min_Ngram': 1, 'Max_Ngram': 3, 'end_grain': False,  'min_grain_freq' : 1979},
    'pos_en':  {'tagScheme': 'BIOES'}
}



Data_Dir = 'data/LuohuCorpus/char/'; min_token_freq = 1
# this is not correct
LH_CN_CHAR = {
    # CTX_IND
    'token':   {'Max_Ngram': 1,},
    'pinyin':  {'Min_Ngram': 1, 'Max_Ngram': 1, 'end_grain': True,  'min_grain_freq' : 0},
    'subcomp': {'Min_Ngram': 1, 'Max_Ngram': 1, 'end_grain': True,  'min_grain_freq' : 0},
    'pos':     {'tagScheme': 'BIOES'},
    'medpos':  {'tagScheme': 'BIOES'}
}
    
    
class FldEmbed(object):
    '''
    This is a fake container for the true FieldEmbedding class.
    It is also a light-weight substitution for the aforementioned class.
    '''
    def __init__(self, path, size = 200):
        
        # (+) get CS
        if 'WikiEnglish' in path:
            CS = EN  
        elif 'WikiChinese/char' in path:
            CS = CN_CHAR 
        elif 'LuohuCorpus/char' in path:
            CS = LH_CN_CHAR
        else:
            raise ValueError('No available Channel Settings')
        
        # (+) get raw d
        self.Field_Settings = {}
        results = [x for x in os.walk(path) if x[2]]
        self.path = path
        d = {i[0]: [t for t in i[2] if str(size) in t] for i in results}
        # print(d)
        
        # (+) get left and right
        Left, Right = [], []
        for path, names in d.items():
            left = []
            for name in names:
                if '_right_' in name and '.npy' not in name:
                    modelname = os.path.join(path, name)
                    Right.append(modelname)
                    # print(modelname)
                elif '_left_' in name and '.npy' not in name:
                    modelname = os.path.join(path, name)
                    left.append(modelname)
            Left.append(left)
        

        self.Left = Left[0]
        self.Right = Right[0]


        # print('Left')
        # print(self.Left)
        # print('Right')
        # print(self.Right)

        # (+) Weights
        self.wv_neg = KeyedVectors.load(self.Right)
        self.weights = {}
        for pth in self.Left:
            end = pth.split('/')[-1]
            fld = 'pos_en' if 'pos_en' in end else end.split('_')[-1]
            # print(fld)
            if fld == 'token':
                self.weights[fld] = self.wv_neg
                self.Field_Settings[fld] = CS[fld]
            else:
                self.weights[fld] = KeyedVectors.load(pth)
                self.Field_Settings[fld] = CS[fld]


        print('[fieldlm.script_train.embed_config.FldEmbed.__init__]//Field_Settings:', self.Field_Settings)
########################################################################################     


