import csv
import json
import os

from scipy import sparse
import numpy as np
import dill
import h5py
import joblib
import tables


def load_structured_resource(filename, filetype='', **kwargs):
	if (filename.endswith('.dill') or filetype == 'dill'):
		with open(filename, 'rb') as data_file:
			vectors = dill.load(data_file)
		return vectors
	elif (filename.endswith('.joblib') or filetype == 'joblib'):
		return joblib.load(filename)
	elif (filename.endswith('.json') or filetype == 'json'):
		with open(filename, 'r') as data_file:
			vectors = json.load(data_file)
		return vectors
	elif (filename.endswith('.jsonl') or filetype == 'jsonl'):
		data = []
		with open(filename) as data_file:
			for line in data_file:
				data.append(json.loads(line))
		return data
	elif (filename.endswith('.csvl') or filetype == 'csvl'):
		as_set = kwargs.pop('as_set', True)
		with open(filename) as in_file:
			data = in_file.read().strip().split(',')
		return set(data) if as_set else data
	else:
		raise NotImplementedError


def save_structured_resource(obj, out_file, filetype='', create_intermediary_dirs=True, **kwargs):
	path, _ = os.path.split(out_file)

	if (path != '' and not os.path.exists(path) and create_intermediary_dirs):
		os.makedirs(path)

	if (out_file.endswith('.dill') or filetype == 'dill'):
		with open(out_file, 'wb') as data_file:
			dill.dump(obj, data_file, protocol=kwargs.get('dill_protocol', 3))
	elif (out_file.endswith('.joblib') or filetype == 'joblib'):
		joblib.dump(obj, out_file, compress=kwargs.get('joblib_compression', 3),
					protocol=kwargs.get('joblib_protocol', 3))
	elif (out_file.endswith('.json') or filetype == 'json'):
		with open(out_file, 'w', encoding=kwargs.get('encoding', 'utf-8')) as data_file:
			json.dump(obj, data_file, indent=kwargs.get('json_indent', 4), ensure_ascii=kwargs.get('ensure_ascii', True))
	elif (out_file.endswith('.jsonl') or filetype == 'jsonl'):
		if (isinstance(obj, list)):
			with open(out_file, 'w', encoding=kwargs.get('encoding', 'utf-8')) as data_file:
				for line in obj:
					data_file.write(f'{json.dumps(line)}\n')
		else:
			raise ValueError(f'obj must be of type `list` but is of type={type(obj)}!')
	elif (out_file.endswith('.csvl') or filetype == 'csvl'):
		with open(out_file, 'w', encoding=kwargs.get('encoding', 'utf-8')) as data_file:
			data_file.write(','.join(obj))
	else:
		raise NotImplementedError


def numpy_to_hdf(obj, path, name):
	name = name if name.endswith('.hdf') else '{}.hdf'.format(name)
	with tables.open_file(os.path.join(path, name), 'w') as f:
		atom = tables.Atom.from_dtype(obj.dtype)
		arr = f.create_carray(f.root, name, atom, obj.shape)
		arr[:] = obj


def hdf_to_numpy(path, name, compression_level=0, compression_lib='zlib'):
	name = name if name.endswith('.hdf') else '{}.hdf'.format(name)
	filters = tables.Filters(complevel=compression_level, complib=compression_lib)
	with tables.open_file(os.path.join(path, name), 'r', filters=filters) as f:
		try: # TODO: QUICKHACK - REMOVE LATER!!!!!
			arr = np.array(getattr(f.root, name).read())
		except tables.exceptions.NoSuchNodeError:
			arr = np.array(getattr(f.root, name.split('.')[0]).read())
	return arr


def numpy_to_hdf5(obj, path, name):
	name = name if name.endswith('.hdf5') else '{}.hdf5'.format(name)
	with h5py.File(os.path.join(path, name), 'w') as f:
		f.create_dataset('hdf5_data', data=obj)


def hdf5_to_numpy(path, name, **kwargs):
	name = name if name.endswith('.hdf5') else '{}.hdf5'.format(name)

	data = []
	with h5py.File(os.path.join(path, name), 'r') as f:
		for key in f.keys():
			data.append(f[key][()])
		X = np.array(data)
		keys = list(f.keys())

	return X if kwargs.get('return_np_array_only', True) else (X, keys)


def hdf5_to_dict_of_arrays(path, name, **kwargs):
	name = name if name.endswith('.hdf5') else '{}.hdf5'.format(name)
	expect_int_keys = kwargs.pop('expect_int_keys', True)

	data = {}
	if (expect_int_keys):
		with h5py.File(os.path.join(path, name), 'r') as f:
			for key in f.keys():
				try:
					k = int(key)
					data[k] = f[key][:]
				except ValueError:
					pass
	else:
		with h5py.File(os.path.join(path, name), 'r') as f:
			for key in f.keys():
				data[key] = f[key][:]

	return data


def sparse_matrix_to_hdf(obj, path, name):
	if (sparse.isspmatrix_csr(obj) or sparse.isspmatrix_csc(obj)):
		sparse_csx_matrix_to_hdf(obj, path, name)
	elif (sparse.isspmatrix_coo(obj)):
		sparse_coo_matrix_to_hdf(obj, path, name)
	else:
		raise ValueError('Type {} not yet supported for serialisation!'.format(type(obj)))


def hdf_to_sparse_matrix(path, name, sparse_format):
	if (sparse_format in ['csr', 'csc']):
		return hdf_to_sparse_csx_matrix(path, name, sparse_format)
	elif (sparse_format == 'coo'):
		return hdf_to_sparse_coo_matrix(path, name)
	else:
		raise ValueError('Sparse format "{}" not yet supported for de-serialisation!'.format(sparse_format))


def hdf_to_sparse_csx_matrix(path, name, sparse_format):
	attrs = _get_attrs_from_hdf_file(path, name, 'csx', ['data', 'indices', 'indptr', 'shape'])
	constructor = getattr(sparse, '{}_matrix'.format(sparse_format))

	return constructor(tuple(attrs[:3]), shape=tuple(attrs[3]))


def hdf_to_sparse_coo_matrix(path, name):
	attrs = _get_attrs_from_hdf_file(path, name, 'coo', ['data', 'rows', 'cols', 'shape'])

	return sparse.coo_matrix((attrs[0], tuple(attrs[1:3])), shape=attrs[3])


def _get_attrs_from_hdf_file(path, name, sparse_format, attributes):
	with tables.open_file(os.path.join(path, name), 'r') as f:
		attrs = []
		for attr in attributes:
			attrs.append(getattr(f.root, '{}_{}'.format(sparse_format, attr)).read())
	return attrs


def sparse_csx_matrix_to_hdf(obj, path, name):
	with tables.open_file(os.path.join(path, name), 'a') as f:
		for attr in ['data', 'indices', 'indptr', 'shape']:
			arr = np.asarray(getattr(obj, attr))
			atom = tables.Atom.from_dtype(arr.dtype)
			d = f.create_carray(f.root, 'csx_{}'.format(attr), atom, arr.shape)
			d[:] = arr


def sparse_coo_matrix_to_hdf(obj, path):

	# Data
	with tables.open_file(os.path.join(path, 'coo_matrix.hdf'), 'a') as f:
		atom = tables.Atom.from_dtype(obj.data.dtype)
		d = f.create_carray(f.root, 'coo_data', atom, obj.data.shape)
		d[:] = obj.data

	# Rows
	with tables.open_file(os.path.join(path, 'coo_matrix.hdf'), 'a') as f:
		atom = tables.Atom.from_dtype(obj.nonzero()[0].dtype)
		d = f.create_carray(f.root, 'coo_rows', atom, obj.nonzero()[0].shape)
		d[:] = obj.nonzero()[0]

	# Columns
	with tables.open_file(os.path.join(path, 'coo_matrix.hdf'), 'a') as f:
		atom = tables.Atom.from_dtype(obj.nonzero()[1].dtype)
		d = f.create_carray(f.root, 'coo_cols', atom, obj.nonzero()[1].shape)
		d[:] = obj.nonzero()[1]

	# Shape
	with tables.open_file(os.path.join(path, 'coo_matrix.hdf'), 'a') as f:
		atom = tables.Atom.from_dtype(np.asarray(obj.shape).dtype)
		d = f.create_carray(f.root, 'coo_shape', atom, np.asarray(obj.shape).shape)
		d[:] = np.asarray(obj.shape)


def numpy_to_csv(X, filename, delimiter=',', header=None):
	with open(filename, 'w') as out_file:
		csv_writer = csv.writer(out_file, delimiter=delimiter)
		if (header is not None):
			csv_writer.writerow(header)
		for row in X:
			csv_writer.writerow(row)
