import json
import os
import urllib.error
import urllib.parse
import urllib.request
from datetime import date

from nltk import pos_tag
from nltk import tree as ntree
from nltk.stem import WordNetLemmatizer

from cltl.combot.backend.api.discrete import UtteranceType
from . import wordnet_utils as wu

wnl = WordNetLemmatizer()

ROOT = os.path.join(os.path.dirname(__file__), '..')
lexicon = json.load(open(os.path.join(ROOT, 'data', 'lexicon.json')))


def trim_dash(triple):
    """
    :param triple: a set with three elements (subject, predicate, object)
    :return: clean triple with extra dashes removed
    """
    for el in triple:
        if triple[el]:
            if triple[el].startswith('-'):
                triple[el] = triple[el][1:]
            if triple[el].endswith('-'):
                triple[el] = triple[el][:-1]
    return triple


def fix_pronouns(pronoun, speaker):
    """
    :param pronoun: personal ronoun which is said in the sentence
    :param speaker: the original speaker get_pos_in_tree an utterance
    :return: disambiguated first or second person pronoun
    In the case of third person pronouns - guesses or asks questions
    * plural *
    """

    entry = lexicon_lookup(pronoun, lexicon)

    if entry and 'person' in entry:
        if entry['person'] == 'first':
            return speaker
        elif entry['person'] == 'second':
            return 'leolani'
        else:
            # print('disambiguate third person')
            return pronoun
    else:
        return pronoun


def lemmatize(word, tag=''):
    """
    This function uses the WordNet lemmatizer
    :param word: word to be lemmatized
    :param tag: POS tag of word
    :return: word lemma
    """
    lem = ''
    if len(word.split()) > 1:
        for el in word.split():
            lem += wnl.lemmatize(el) + ' '
        return lem.strip()
    if tag != '':
        return wnl.lemmatize(word, tag)
    return wnl.lemmatize(word)


def lexicon_lookup(word, typ=None):
    """
    Look up and return features of a given word in the lexicon.
    :param word: word which we're looking up
    :param typ: type of word, if type is category then returns the lexicon entry and the word type
    :return: lexicon entry of the word
    """

    # Define pronoun categories.
    pronouns = lexicon["pronouns"]
    subject_pros = pronouns["subject"]
    object_pros = pronouns["object"]
    possessive_pros = pronouns["possessive"]
    dep_possessives = possessive_pros["dependent"]
    indep_possessives = possessive_pros["independent"]
    reflexive_pros = pronouns["reflexive"]
    indefinite_pros = pronouns["indefinite"]
    indefinite_person = indefinite_pros["person"]
    indefinite_place = indefinite_pros["place"]
    indefinite_thing = indefinite_pros["thing"]

    # Define verbal categories.
    verbs = lexicon["verbs"]
    to_be = verbs["to be"]
    aux_verbs = verbs["auxiliaries"]
    have = aux_verbs['have']
    to_do = aux_verbs["to do"]
    modals = aux_verbs["modals"]
    lexicals = verbs["lexical verbs"]

    # Define determiner categories.
    determiners = lexicon["determiners"]
    articles = determiners["articles"]
    demonstratives = determiners["demonstratives"]
    possessive_dets = determiners["possessives"]
    quantifiers = determiners["quantifiers"]
    wh_dets = determiners["wh-determiners"]
    numerals = determiners["numerals"]
    cardinals = numerals["cardinals"]
    ordinals = numerals["ordinals"]
    s_genitive = determiners["s-genitive"]

    # Define conjunction categories.
    conjunctions = lexicon["conjunctions"]
    coordinators = conjunctions["coordinating"]
    subordinators = conjunctions["subordinating"]

    # Define a question word category.
    question_words = lexicon["question words"]

    # Define a kinship category.
    kinship = lexicon["kinship"]

    if typ == 'verb':
        categories = [to_be,
                      to_do,
                      have,
                      modals,
                      lexicals]

    elif typ == 'pos':
        categories = [dep_possessives]

    elif typ == 'to_be':
        categories = [to_be]

    elif typ == 'aux':
        categories = [to_do, to_be, have]

    elif typ == 'modal':
        categories = [modals]

    elif typ == 'pronouns':
        categories = [subject_pros,
                      object_pros,
                      dep_possessives,
                      indep_possessives,
                      reflexive_pros,
                      indefinite_person,
                      indefinite_place,
                      indefinite_thing]
    elif typ == 'lexical':
        categories = [lexicals]
    elif typ == 'kinship':
        categories = [kinship]
    elif typ == 'det':
        categories = [articles, demonstratives, possessive_dets, possessive_pros, cardinals, ordinals]
    else:
        categories = [subject_pros,
                      object_pros,
                      dep_possessives,
                      indep_possessives,
                      reflexive_pros,
                      indefinite_person,
                      indefinite_place,
                      indefinite_thing,
                      to_be,
                      to_do,
                      have,
                      modals,
                      lexicals,
                      articles,
                      demonstratives,
                      possessive_dets,
                      quantifiers,
                      wh_dets,
                      cardinals,
                      ordinals,
                      s_genitive,
                      coordinators,
                      subordinators,
                      question_words,
                      kinship]

    for category in categories:
        for item in category:
            if word == item:
                if typ == 'category':
                    return category, category[item]
                return category[item]
    return None


def get_triple_element_type(element, forest):
    """
    :param element: text of one element from the triple
    :param forest: parsed tree
    :return: dictionary with semantic types of the element or sub-elements
    """

    types = {}

    # Multiword element
    if '-' in element:
        text = element.replace(" ", "-")

        # Try to get type from DBpedia
        uris = get_uris(text.strip())
        if len(uris) > 1:
            # entities with more than 1 uri from DBpedia are NE and collocations
            return 'NE-col'

        # Try to get types from wordnet
        lexname = get_lexname_in_tree(text, forest)
        if lexname:
            # collocations which exist in WordNet
            return lexname + '-col'

        # if entity does not exist in DBP or WN it is considered composite. Get type per word
        for el in element.split('-'):
            types[el] = get_word_type(el, forest)

    # Single word
    else:
        types[element] = get_word_type(element, forest)

    return types


def get_word_type(word, forest):
    """
    :param word: one word from triple element
    :param forest: parsed syntax tree
    :return: semantic type of word
    """

    if word == '':
        return ''

    lexname = get_lexname_in_tree(word, forest)
    if lexname is not None:
        return lexname

    # words which don't have a lexname are looked up in the lexicon
    entry = lexicon_lookup(word)
    if entry is not None:
        if 'proximity' in entry:
            return 'deictic:' + entry['proximity'] + ',' + entry['number']
        if 'person' in entry:
            return 'pronoun:' + entry['person']
        if 'root' in entry:
            return 'modal:' + str(entry['root'])
        if 'definite' in entry:
            return 'article:' + entry
        if 'integer' in entry:
            return 'numeral:' + entry['integer']

    # for words which are not in the lexicon nor have a lexname,
    # the sem.type is derived from the POS tag
    types = {'NN': 'agent', 'V': 'verb', 'IN': 'prep', 'TO': 'prep', 'MD': 'modal'}
    pos = get_pos_tag(forest, word)
    if pos in types:
        return types[pos]


def get_lexname_in_tree(word, forest):
    """
    :param word: word for which we want a WordNe lexname
    :param forest: parsed forest of the sentence, to extract the POS tag
    :return: lexname of the word
    https://wordnet.princeton.edu/documentation/lexnames5wn
    """
    if word == '':
        return None

    # Get POS tag
    pos_label = get_pos_tag(forest[0], word)

    # Try to get types from wordnet
    synset = wu.get_synsets(word, pos_label)
    if synset:
        type = wu.get_lexname(synset[0])
        return type


def get_pos_in_tree(tree, word):
    """
    This function extracts POS tag of a word from the parsed syntax tree
    :param tree: syntax tree gotten from initial CFG parsing
    :param word: word whose POS tag we want
    :return: POS tag of the word
    """
    label = ''
    for el in tree:
        for node in el:
            if type(node) == ntree.Tree:
                for subtree in node.subtrees():
                    for n in subtree:
                        if n == word:
                            label = str(subtree.label())
                            return label
    return label


def get_pos_tag(forest, word):
    """
    This function extract POS tags from either the tree or the word alone
    :param forest: syntax tree gotten from initial CFG parsing
    :param word: word whose POS tag we want
    :return: POS tag of the word
    """
    pos_label = get_pos_in_tree(forest, word)
    pos_label = pos_tag([word])[0][1] if pos_label == '' else pos_label

    return pos_label


def dbp_query(q, base_url, format="application/json"):
    """
    :param q: query for DBpedia
    :param base_url: URL to connect to DBpedia
    :param format: format for query, typically json
    :return: json with DBpedia responses
    """
    params = {
        "default-graph": "",
        "should-sponge": "soft",
        "query": q,
        "debug": "on",
        "timeout": "",
        "format": format,
        "save": "display",
        "fname": ""
    }

    querypart = urllib.parse.urlencode(params)
    response = urllib.request.urlopen(base_url, querypart).read()
    return json.loads(response)


def get_uris(string):
    """
    :param string: string which we are querying for
    :return: set of URIS from DBpedia for the queried string
    """
    query = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                SELECT ?pred WHERE {
                  ?pred rdfs:label """ + "'" + string + "'" + """@en .
                }
                ORDER BY ?pred"""

    try:
        results = dbp_query(query, "http://dbpedia.org/sparql")
        uris = []
        for x in results['results']['bindings']:
            uris.append(x['pred']['value'])
    except:
        uris = []

    return uris


def utterance_to_capsules(utterance):
    """
    Transform an Utterance into a list of capsules
    :param utterance:
    :return:
    """
    capsules = []

    for triple in utterance.triples:
        capsule = {"chat": utterance.chat.id,
                   "turn": utterance.turn,
                   "author": utterance.chat_speaker,
                   "utterance": utterance.transcript,
                   "utterance_type": triple['utterance_type'],
                   "position": "0-" + str(len(utterance.transcript)),
                   ###
                   "subject": triple['subject'],
                   "predicate": triple['predicate'],
                   "object": triple['object'],
                   "perspective": triple["perspective"],
                   ###
                   "context_id": None,
                   "date": utterance.datetime.isoformat(),
                   "place": "",
                   "place_id": None,
                   "country": "",
                   "region": "",
                   "city": "",
                   "objects": [],
                   "people": []
                   }

        capsules.append(capsule)

    return capsules


def element_to_json(v):
    if type(v) in [str, int, float] or v is None:
        pass
    elif isinstance(v, date):
        v = v.isoformat()
    elif isinstance(v, UtteranceType):
        v = v.name
    elif isinstance(v, list):
        v = [element_to_json(el) for el in v]
    elif isinstance(v, dict):
        v = {inner_k: element_to_json(inner_v) for inner_k, inner_v in v.items()}
    else:
        v = {inner_k: element_to_json(inner_v) for inner_k, inner_v in v.__dict__.items()}

    return v


def triple_to_json(triple):
    return {k: element_to_json(v) for k, v in triple.items()}


def deduplicate_triples(triples):
    # TODO make more efficient
    sorted_triples = []
    for triple in triples:
        sorted_triple = dict(sorted(triple.items()))
        sorted_triples.append(sorted_triple)

    json_triples = []
    for triple in sorted_triples:
        triple_as_json = json.dumps(triple_to_json(triple))
        json_triples.append(triple_as_json)

    unique_triples = []
    for triple in set(json_triples):
        unique_triples.append(json.loads(triple))

    return unique_triples
