#include "ngram.ih"

void NGram::load(std::string path)
{
    cerr << "Loading: " << path << ".1\n";
    ifstream in(path + ".1");
    if (!in.good())
    {
        cerr << "Could not read unigrams: " << path << "\n";
        exit(1);
    }
    string word1, word2;
    d_unigramCounts.push_back(0);
    d_bigramCounts.push_back(0);
    d_bigrams.push_back(0);
    uint64_t count;
    while(in >> word1 >> count)
    {
        d_unigrams.addOrdered(word1);
        d_unigramCounts.push_back(count);
    }
    in.close();

    cerr << "Loading: " << path << ".2\n";
    ifstream in2(path + ".2");
    if (!in2.good())
    {
        cerr << "Could not read bigrams: " << path << "\n";
        exit(1);
    }

    uint64_t curId = 0;
    while (in2 >> word1 >> word2 >> count)
    {
        //cout << word1 << '\t' << word2 << '\n';
        uint32_t word1Id = d_unigrams.getId(word1);
        uint32_t word2Id = d_unigrams.getId(word2);

        uint64_t bigramId = word1Id;
        bigramId <<=32;
        bigramId += word2Id;

        if (bigramId >= curId)
        {
            d_bigrams.push_back(bigramId);
            d_bigramCounts.push_back(count);
            curId = bigramId;
        }
        else
            cerr << "error: Bigrams are not ordered the same as Unigrams, "
                 << "results might not be correct. See: \n" << word1 << ' ' 
                 << word2 << '\n';
    }
    in2.close();
}

