"""Argument parsers."""


import argparse
import sys

from synthaser import __version__


def add_rules_group(parser):
    group = parser.add_argument_group("Other arguments")
    group.add_argument(
        "-uf",
        "--rule_file",
        help="Custom rule file generated by the synthaser rule generator",
    )


def add_cdsearch_group(parser):
    group = parser.add_argument_group("CD-Search parameters")
    group.add_argument(
        "-rf",
        "--results_file",
        help="Path to results file from a previous search, or path to save results of"
        " a new search",
    )
    group.add_argument(
        "--cdsid",
        help="CD-Search run ID, e.g. QM3-qcdsearch-B4BAD4B59BC5B80-3E7CFCD3F93E21D0."
        " If provided, will attempt to retrieve results instead of starting a new"
        " search.",
    )
    group.add_argument(
        "--smode",
        choices=["auto", "prec", "live"],
        help="CD-Search search mode (def. auto)",
    )
    group.add_argument(
        "--useid1",
        choices=["true", "false"],
        help="Let NCBI find protein ID's in their archival database if they are not"
        " recognized as current Protein Entrez entries (def. true)",
    )
    group.add_argument(
        "--compbasedadj",
        choices=["0", "1"],
        help="Use composition-corrected scoring (def. 1)",
    )
    group.add_argument(
        "--filter",
        choices=["true", "false"],
        help="Filter out compositionally biased regions from query sequences (def. true)",
    )
    group.add_argument(
        "--evalue",
        type=float,
        help="Maximum E-value threshold (def. 3.0). Note that by default this is very"
        " generous, to catch domains that typically have low scores (e.g. 'SAT' domains)",
    )
    group.add_argument(
        "--maxhit",
        type=float,
        help="Maximum number of conserved domain hits to return from search (def. 500)",
    )
    group.add_argument(
        "--dmode",
        default="full",
        choices=["full", "rep", "std"],
        help="What level of CD-Search hits to report (def. full)",
    )


def add_searchopts_group(parser):
    group = parser.add_argument_group("Search options")
    group.add_argument(
        "-m",
        "--mode",
        choices=["local", "remote"],
        default="remote",
        help="Specifies synthaser search mode (def. 'remote'). If 'local', will run"
        " rpsblast against a local database.",
    )
    group.add_argument(
        "-db",
        "--database",
        help="Name of the database to search (def. cdd). If --mode is local, this should"
        " be the name of a valid rpsblast database",
    )


def add_output_group(parser):
    group = parser.add_argument_group("Output")
    group.add_argument(
        "-p",
        "--plot",
        nargs="?",
        const=True,
        default=False,
        help="Generate a synthaser plot"
    )
    group.add_argument(
        "-json",
        "--json_file",
        help="Serialise Synthase objects to JSON. If this is specified, the synthases"
        " can be loaded from this file using the synthaser Python API.",
    )
    group.add_argument(
        "-o",
        "--output",
        nargs="?",
        type=argparse.FileType("w"),
        default=sys.stdout,
        help="Save domain architecture summary to file",
    )
    group.add_argument(
        "-lf",
        "--long_form",
        action="store_true",
        help="Return output in long data format",
    )


def add_input_group(parser):
    group = parser.add_argument_group("Input")
    group.add_argument(
        "-qf",
        "--query_file",
        type=argparse.FileType("r"),
        help="Path to FASTA file containing query synthase sequences",
    )
    group.add_argument(
        "-qi",
        "--query_ids",
        nargs="+",
        help="Collection of NCBI sequence identifiers corresponding to"
        " query synthases",
    )


def add_search_subparser(subparsers):
    parser = subparsers.add_parser(
        "search",
        help="Run a synthaser search",
        description="Run a synthaser search.",
        epilog="Usage examples\n--------------\n"
        "Analyse sequences in a FASTA file and generate a plot:\n"
        "  $ synthaser search -qf sequences.fa -p\n\n"
        "Analyse sequences from the NCBI Protein database and save the search:\n"
        "  $ synthaser search -qi Q0CJ59.1 CAA39295.1 -json session.json\n\n"
        "Use custom domain and classification rule files:\n"
        "  $ synthaser search -qf sequences.fa \\\n"
        "      -df domain_rules.json \\\n"
        "      -cf classification_rules.json\n\n"
        "Download a CDD database and do a local search:\n"
        "  $ synthaser getdb Smart mydatabases\n"
        "  $ synthaser search -qf sequences.fa \\\n"
        "      -m local -db mydatabases/Smart_LE/Smart\n\n"
        "Cameron L.M. Gilchrist, 2020.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    add_input_group(parser)
    add_output_group(parser)
    add_searchopts_group(parser)
    add_cdsearch_group(parser)
    add_rules_group(parser)


def add_getseq_subparser(subparsers):
    p = subparsers.add_parser(
        "getseq",
        help="Download sequences from NCBI",
        description="Download sequences from NCBI in FASTA format. This utility will"
        " accept either a file containing newline separated sequence identifiers,"
        " or directly on the command line separated by spaces.",
        epilog="Cameron L.M. Gilchrist 2019",
    )
    p.add_argument(
        "sequence_ids",
        nargs="+",
        help="Collection of NCBI sequence identifiers to retrieve",
    )
    p.add_argument(
        "-o",
        "--output",
        nargs="?",
        default=sys.stdout,
        type=argparse.FileType("w"),
        help="Where to print output (def. stdout)",
    )


def add_getdb_subparser(subparsers):
    p = subparsers.add_parser(
        "getdb",
        help="Download a CDD database for local searches",
        description="Download a pre-formatted rpsblast database.\n\n"
        "For full description of the available databases, see:\n"
        " https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#CDSource\n\n"
        "Note that 'cdd_families' will download a file containing a summary of\n"
        "all families in the CDD for rule building - not a searchable database.",
        epilog="Cameron L.M. Gilchrist 2019",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    p.add_argument(
        "database",
        choices=["cdd_families", "Cdd", "Cdd_NCBI", "Cog", "Kog", "Pfam", "Prk", "Smart", "Tigr"],
        help="Database to be downloaded",
    )
    p.add_argument(
        "folder",
        help="Folder where database is to be saved. Will save a .tar.gz file, and"
        " extract its contents to a folder of the same name.",
    )


def add_extract_subparser(subparsers):
    parser = subparsers.add_parser(
        "extract",
        help="Extract domain/synthase sequences from search results",
        description="Extract domain/synthase sequences from search results.",
        epilog="Usage examples\n--------------\n"
        "Extract KS, A and TE domain sequences:\n"
        "  $ synthaser extract session.json out_ --types KS A TE\n"
        "  Output: out_KS.faa out_A.faa out_TE.faa\n\n"
        "Extract NRPS and non-reducing PKS sequences:\n"
        "  $ synthaser extract session.json out_ \\\n"
        "      --mode synthase \\\n"
        "      --classes Non-reducing NRPS\n"
        "  Output: out_Non-reducing.faa out_NRPS.faa\n\n"
        "Extract PKS_KS domains (CDD) only from highly-reducing PKSs:\n"
        "  $ synthaser extract session.json out_ \\\n"
        "      --families PKS_KS \\\n"
        "      --classes Highly-reducing\n"
        "  Output: out_PKS_KS.faa\n\n"
        "Cameron L.M. Gilchrist, 2020.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument("session", help="Synthaser session file")
    parser.add_argument("prefix", help="Output file prefix")
    parser.add_argument(
        "-m",
        "--mode",
        default="domain",
        choices=["domain", "synthase"],
        help="Extract domain sequences or whole synthases from a session file"
    )
    parser.add_argument("--types", nargs="+", help="Domain types")
    parser.add_argument("--classes", nargs="+", help="Sequence classifications")
    parser.add_argument("--families", nargs="+", help="CDD families")


def add_genbank_subparser(subparsers):
    parser = subparsers.add_parser(
        "genbank",
        help="Extract protein sequences from GenBank files for analysis",
        description="Extract protein sequences from GenBank files."
        " To extract PKS or NRPS sequences from antiSMASH GenBank files,"
        " use the --antismash option."
    )
    parser.add_argument("genbank", help="GenBank file")
    parser.add_argument(
        "--antismash",
        action="store_true",
        help="Extract PKS/NRPS sequences from an antiSMASH file"
    )


def add_config_subparser(subparsers):
    desc = "Configure synthaser"
    parser = subparsers.add_parser(
        "config",
        help=desc,
        description="Configure synthaser (e.g. for setting NCBI e-mail addresses or API keys)",
        epilog=(
            "Example usage\n-------------\n"
            "Set an e-mail address:\n"
            " $ synthaser config --email \"foo@bar.com\"\n\n"
        ),
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "--email",
        help="Your e-mail address, required by NCBI to prevent abuse",
        type=str,
    )
    parser.add_argument(
        "--api_key",
        help="NCBI API key",
        type=str,
    )
    parser.add_argument(
        "--max_tries",
        help="How many times failed requests are retried (def. 3)",
        type=int,
    )


def get_parser():
    parser = argparse.ArgumentParser(
        "synthaser",
        description="synthaser: a Python toolkit for analysing domain architecture of"
        " secondary metabolite megasynth (et) ases with NCBI CD-Search.",
        epilog="Cameron L.M. Gilchrist 2020",
    )
    parser.add_argument(
        "--version", action="version", version="%(prog)s " + __version__
    )
    subparsers = parser.add_subparsers(dest="command")
    add_search_subparser(subparsers)
    add_getdb_subparser(subparsers)
    add_getseq_subparser(subparsers)
    add_extract_subparser(subparsers)
    add_genbank_subparser(subparsers)
    add_config_subparser(subparsers)
    return parser


def parse_args(args):
    parser = get_parser()
    args = parser.parse_args(args)

    if not args.command:
        parser.print_help()
        parser.exit(1)

    if (
        args.command == "search"
        and args.mode == "remote"
        and args.database not in (None, "cdd", "pfam", "smart", "tigrfam", "cog", "kog")
    ):
        raise ValueError("Expected 'cdd', 'pfam', 'smart', 'tigrfam', 'cog' or 'kog'")

    if args.command == "search" and not any(
        [args.query_ids, args.query_file, args.json_file]
    ):
        raise ValueError("Expected query_ids, query_file or json_file")

    return args
