#!/usr/bin/env python3
# Developed by Zang Lab at University of Virginia - 2018

#Author: Jin Yong Yoo

import os
import sys, errno, warnings

curr_path = os.getcwd()
cpu_available = os.cpu_count() - 1  #leave one core for I/O

import subprocess
import argparse

# Imports from SICER package
from sicer.main import run_RECOGNICER_df
from sicer.lib import Utility, GenomeData

def warning_on_one_line(message, category, filename, lineno, file=None, line=None):
        return '%s:%s: %s:%s\n' % (filename, lineno, category.__name__, message)

warnings.formatwarning = warning_on_one_line

def main():
    '''The main function/pipeline for RECOGNICER-differential enrichment'''
    parser = argparse.ArgumentParser(description='Processing arguments for RECOGNICER', usage = "Use --help or -h for more information")

    parser.add_argument(
        '--treatment_file',
        '-t',
        required=True,
        nargs='+',
        type=str,
        help='''Names of the sample file you wish to run RECOGNICER on. This can either be the relative or the absolute path of the file. Must be in BED or BAM format.'''
    )

    parser.add_argument(
        '--control_file',
        '-c',
        required=False,
        nargs='*',
        type=str,
        help='''Name of the control library in BED or BAM format. This can either be the relative or the absolute path of the file. If you wish to run RECOGNICER without a control library, simply do not enter the file. '''
    )

    parser.add_argument(
        '--species',
        '-s',
        required=True,
        type=str,
        help='The species/genome used (ex: hg38)'
    )

    parser.add_argument(
        '--redundancy_threshold',
        '-rt',
        required=False,
        type=int,
        default=1,
        help='The number of copies of indentical reads allowed in a library. Default value is 1'
    )

    parser.add_argument(
        '--window_size',
        '-w',
        required=False,
        type=int,
        default=200,
        help='Resolution of RECOGNICER. Default value is 200 (bp)'
    )
    parser.add_argument(
        '--fragment_size',
        '-f',
        required=False,
        type=int,
        default=150,
        help='The amount of shift from the beginning of a read to the center of the DNA fragment represented by the read. Default value is 150 (bp).'
    )

    parser.add_argument(
        '--effective_genome_fraction',
        '-egf',
        required=False,
        type=float,
        default=0.74,
        help='Effective genome as fraction of the genome size. Default value is 0.74'
    )

    parser.add_argument(
        '--false_discovery_rate',
        '-fdr',
        required=False,
        default=0.01,
        type=float,
        help='''Remove all islands with an false_discovery_rate below cutoff. Default value is 0.01.'''
    )

    parser.add_argument(
        '--false_discovery_rate_df',
        '-fdr_df',
        required=False,
        default=0.01,
        type=float,
        help='Cutoff for identification of significant changes been wild-type library and knockout library. Only provide a value when comparing two libraries.'
    )

    parser.add_argument(
        '--output_directory',
        '-o',
        required=False,
        default=curr_path,
        type=str,
        help='Path of the directory in which results will be stored. Default path is the current path'
    )

    parser.add_argument(
        '--step_size',
        '-s_size',
        required=False,
        type=int,
        default=3,
        help='Step Size: the number of windows in one graining unit. Used for RECOGNICER algorithm '
    )

    parser.add_argument(
        '--step_score',
        '-s_score',
        required=False,
        type=int,
        default=2,
        help='Step Score: The minimum number of positive elements in the graining unit to call the unit positive. Used for RECOGNICER algorithm'
    )

    parser.add_argument(
        '--cpu',
        '-cpu',
        required=False,
        type=int,
        default=cpu_available,
        help='CPU Core Count: The number of CPU cores RECOGNICER program will use when executing multi-processing tasks. Ideal core count is the species\' number of chromosomes. Default value is the maximum number of cores avaiable in the system.'
    )

    parser.add_argument(
        '--significant_reads',
        required=False,
        action='store_true',
        help='Output Significant Reads: Enter \"--significant_reads\" to have SICER produce a BED file of treatment reads filtered by significant islands and WIG file of filtered reads binned into windows'
    )

    parser.add_argument(
        "--verbose",
        "-v",
        required=False,
        help="increase console output verbosity",
        action="store_true")

    args = parser.parse_args()
    setattr(args,'subcommand','RECOGNICER')
    setattr(args,'df',True)

    # Check if argument inputs are valid
    if (len(args.treatment_file) != 2):
        sys.stderr.write("Error: RECOGNICER needs two treatment files as input\n")
        sys.exit(1)

    for i in range(len(args.treatment_file)):
        file = args.treatment_file[i]
        if not(os.path.isabs(file)):
            args.treatment_file[i] = os.path.join(curr_path, file)

        if (not (Utility.fileExists(args.treatment_file[i]))):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), file)

        if (not (file.lower().endswith('.bed')) and not (file.lower().endswith('.bam'))):
            warnings.warn("Treatment file must be in BED or BAM format.")

        if (file.lower().endswith('.bam')):
            bed_file_name = args.treatment_file[i].replace('.bam', '.bed')

            if not Utility.fileExists(bed_file_name):
                process =  subprocess.Popen("bedtools bamtobed -i %s > %s" % (file, bed_file_name),
                                stdin=subprocess.PIPE,
                                shell=True,
                                )
                process.communicate()
                if process.returncode != 0:
                    sys.stderr.write("Error: Cannot convert BAM file to BED file.\nCheck if bedtools2 (https://github.com/arq5x/bedtools2) has been installed correctly.\n")
                    sys.exit(1)
            args.treatment_file[i] = bed_file_name

    if (not (args.species in GenomeData.species_chroms.keys())):
        sys.stderr.write("Error: Species " + args.species + " not recognized.\n")
        sys.exit(1)

    if (not (args.effective_genome_fraction <= 1 and args.effective_genome_fraction >= 0)):
        sys.stderr.write("Error: Effective genome fraction must be a value between 0 and 1.\n")
        sys.exit(1)

    if (args.step_score > args.step_size):
        sys.stderr.write("Error: Cannot have the step score be larger than step size.\n")
        sys.exit(1)

    if args.cpu > cpu_available:
        args.cpu = cpu_available
        warnings.warn("The number of CPU cores entered is greater than the number of cores available for this process. Executing SICER with the maximum number of cores available.\n")

    if not os.path.exists(args.output_directory):
        try:
            os.makedirs(args.output_directory)
        except:
            sys.exit("Output directory (%s) could not be created. Terminating program.\n" % args.output_directory)

    if not(os.path.isabs(args.output_directory)):
        args.output_directory = os.path.join(curr_path, args.output_directory)

    if (args.control_file is not None):
        if (len(args.control_file) > 2):
            sys.stderr.write("Error: Too many control file inputs. RECOGNICER accepts at max two files.\n")
            sys.exit(1)

        if (len(args.control_file) == 1):
            args.control_file.append(args.control_file[0])

        for i in range(len(args.control_file)):
            file = args.control_file[i]
            if not(os.path.isabs(file)):
                args.control_file[i] = os.path.join(curr_path, file)

            if (not (Utility.fileExists(args.control_file[i]))):
                raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), file)

            if (not (file.lower().endswith('.bed')) and not (file.lower().endswith('.bam'))):
                warnings.warn("Treatment file must be in BED or BAM format.")

            if (file.lower().endswith('.bam')):
                bed_file_name = args.control_file[i].replace('.bam', '.bed')

                if not Utility.fileExists(bed_file_name):
                    process =  subprocess.Popen("bedtools bamtobed -i %s > %s" % (file, bed_file_name),
                                stdin=subprocess.PIPE,
                                shell=True,
                                )
                    process.communicate()
                    if process.returncode != 0:
                        sys.stderr.write("Error: Cannot convert BAM file to BED file.\nCheck if bedtools2 (https://github.com/arq5x/bedtools2) has been installed correctly.\n")
                        sys.exit(1)
                args.control_file[i] = bed_file_name


    print("Running RECOGNICER with given arguments \n")
    run_RECOGNICER_df.main(args)
    print("\nProgram Finished Running")


if __name__ == '__main__':
    main()
