import logging
import numpy as np
from sklearn.decomposition import PCA
from numpy import dtype, fromstring, float32 as REAL

def load_word2vec_format(fname, wordlist, dim, binary=None):
    logging.info("loading projection weights from %s" % (fname))
    if binary == None:
        if fname.endswith('.txt'):
            binary = False
        elif fname.endswith('.bin'):
            binary = True
        else:
            raise NotImplementedError('Cannot infer binary from %s' % (fname))

    vocab = {}
    logging.info("loading matrix from %s" % (fname))
    with open(fname) as fin:
        header = fin.readline()
        vocab_size, vec_size = map(int, header.split())  
        if binary:
            binary_len = dtype(REAL).itemsize * vec_size
            for line_no in xrange(vocab_size):
                word = []
                while True:
                    ch = fin.read(1)
                    if ch == ' ':
                        word = ''.join(word)
                        break
                    if ch != '\n':
                        word.append(ch)
                vocab[word] = fromstring(fin.read(binary_len), dtype=REAL)
        else:
            for line_no, line in enumerate(fin):
                parts = line.strip().split(' ')
                if len(parts) != vec_size + 1:
                    logging.info("Wrong line: %s %s\n" % (line_no, line))
                word, weights = parts[0], map(REAL, parts[1:])
                vocab[word] = weights
        logging.info("loaded matrix from %s" % (fname))

    logging.info('getting useful word vectors')
    
    '''
    index = 0
    wordtable = []
    for key, value in vocab.items():
        if wordlist.has_key(key):
            wordtable.append(value)
            vocab[key] = index
            index += 1
        else:
            vocab.pop(key)
    wordtable = np.array(wordtable)
    logging.info('pca...')
    pca = PCA(n_components=dim)
    pca_wordtable = pca.fit_transform(wordtable)
    
    for key in vocab.keys():
        vocab[key] = pca_wordtable[vocab[key]]
    '''

    return vocab
