from lxml import etree
import glob
import csv


def name_to_text(name, root):
    "Convert name to text."
    parts = [part.text.strip() for part in name.findall('namePart', root.nsmap)]
    return ' '.join(parts)


def get_names(entry, root, sep=';'):
    "Get names from entry."
    names = [name_to_text(name, root) for name in entry.findall('name', root.nsmap)]
    return sep.join(names)


def get_tag_text(tag, entry, root):
    "Get text from a particular tag."
    try:
        return entry.find(tag, root.nsmap).text
    except AttributeError:
        return None
    

def get_row(entry, root):
    "Produce a data row for a given entry."
    title       = get_tag_text('titleInfo/title', entry, root)
    url         = get_tag_text('location/url', entry, root)
    year        = get_tag_text('originInfo/dateIssued', entry, root)
    identifier  = get_tag_text('identifier', entry, root)
    venue       = get_tag_text('relatedItem/titleInfo/title', entry, root)
    try:
        authors = get_names(entry, root)
    # This happens for proceedings:
    except AttributeError:
        authors = None
    row = [identifier, title, url, year, venue, authors]
    return row


def rows_for_file(filename):
    "Get rows for any given file."
    parse = etree.parse(filename)
    root = parse.getroot()
    entries = root.findall('mods',root.nsmap)
    rows = [get_row(entry, root) for entry in entries]
    return rows


files = glob.glob('./xml/*.xml')
all_rows = []
for filename in files:
    rows = rows_for_file(filename)
    all_rows.extend(rows)

headers = ['identifier', 'title', 'url', 'year', 'venue', 'authors']
with open('inlg.csv','w') as f:
    writer= csv.writer(f)
    writer.writerow(headers)
    writer.writerows(all_rows)
