import spacy_udpipe


def _load_tokeniser_for_lang(language):
    nlp = None
    try:
        nlp = spacy_udpipe.load(language)
    except:
        spacy_udpipe.download(language)
        nlp = spacy_udpipe.load(language)

    if nlp is None:
        raise ValueError(f'No UDPIPE tokeniser found for language={language}!')

    return nlp


def tokenise_udpipe(text, language):
    nlp = _load_tokeniser_for_lang(language=language)

    doc = nlp(text)
    tokens = []
    for sent in doc.sents:
        for token in sent:
            tokens.append(token.text)

    return tokens