#-*-coding:utf8-*-
"""
Read data file, and then put them into ES.

"""
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from elasticsearch import exceptions
import traceback
import datetime
import sys

#reload(sys) #sys.setdefaultencoding('utf-8')
def _create_index(es, index_name="qdoc", doc_type_name="qdoc"):
    my_mappings = {
            "qdoc": {
                '_all': {
                    'enabled': 'false'
                    },
                "properties": {
                    "name": {
                        'type': 'string'
                        },
                    'content': {
                        'type': 'text'
                        }
                    'subredditid': {
                        'type': 'string'
                        }
                    }
                }
            }
    settings = {
            'mappings': my_mappings
            }

    create_index = es.indices.create(index=index_name, body=settings)

def _save_data(es, input_file):
    all_data = list()
    count = 0
    with open(input_file) as f_r:
        for line in f_r:
            count += 1
            ts = line.strip().split('\t')
            all_data.append({
                '_index': 'qdoc',
                '_type': 'qdoc',
                '_source': {
                    'name': ts[0],
                    'content': ts[1],
                    'subredditid': ts[-1]
                    }
                })
            if len(all_data) == 100:
                success, _ = bulk(es, all_data, index='qdoc', raise_on_error=True)
                all_data = list()
                print('{1}: finish {0}'.format(count, input_file))
    if len(all_data) != 0:
        success, _ = bulk(es, all_data, index='qdoc', raise_on_error=True)
        all_data = list()
        print('{1}: finish {0}'.format(count, input_file))
    print('{0}: finish all'.format(input_file))

def _insert_data(es, file_name):
    start_time = datetime.datetime.now()
    _save_data(es, file_name)
    cost_time = datetime.datetime.now() - start_time
    print('all cost time{0}'.format(cost_time))

def _main():
    if len(sys.argv) != 2:
        print('need file argument')
        return 
    es = Elasticsearch(hosts=["127.0.0.1:9200"], timeout=500)
    try:
        _create_index(es)
    except exceptions.RequestError:
        print(traceback.format_exc())
    _insert_data(es, sys.argv[1]);

if __name__ == '__main__':
    _main()