#!/usr/bin/env python

# Copyright (C) 2014 Andrew Lundgren
import argparse
from ligo.lw import utils as ligolw_utils
from ligo.lw import lsctables
from itertools import cycle
import pycbc.version
from pycbc.io.ligolw import LIGOLWContentHandler


# Parse command line
parser = argparse.ArgumentParser()
parser.add_argument("--version", action="version",
                    version=pycbc.version.git_verbose_msg)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("-n", "--num-splits", type=int,
                   help="Number of files to be generated")
group.add_argument("-f", "--output-files", nargs='*', default=None,
                   help="Names of output files")

parser.add_argument("-i", "--input-file", help="Injection file to be split")
parser.add_argument("-o", "--output-dir", default=None,
                    help="Location of output files")

args = parser.parse_args()

if args.output_files and args.output_dir:
    parser.error("Provide only one of --output-dir or --output-files")

# Read in input file
xmldoc = ligolw_utils.load_filename(
        args.input_file, verbose=True, contenthandler=LIGOLWContentHandler)
tabletype = lsctables.SimInspiralTable
allinjs = tabletype.get_table(xmldoc)

# The sim_inspiral table is a grandchild of the document, I guess
xmlroot = xmldoc.childNodes[0]

xmlroot.removeChild(allinjs)

if args.num_splits:
    num_splits = args.num_splits
else:
    num_splits = len(args.output_files)

# make a list of columns that are present in the input table.
# The : split is needed for columns like `process:process_id`,
# which must be listed as `process:process_id` in `lsctables.New()`,
# but are listed as just `process_id` in the `columnnames` attribute
used_columns = []
for col in allinjs.validcolumns:
    att = col.split(':')[-1]
    if att in allinjs.columnnames:
        used_columns.append(col)

new_inj_tables = [lsctables.New(tabletype, columns=used_columns) 
                  for idx in range(num_splits)]

table_cycle = cycle(new_inj_tables)
for inj in sorted(allinjs, key=lambda x: x.time_geocent):
    next(table_cycle).append(inj)

if not args.output_files:
    temp = args.input_file.split('-')
    temp[1] += '_%.4u'
    filename_pattern = '-'.join(temp)

for idx, simtable in enumerate(new_inj_tables):
    xmlroot.appendChild(simtable)
    if not args.output_files:
        out_path = args.output_dir + '/' + filename_pattern % idx
    else:
        out_path = args.output_files[idx]
    ligolw_utils.write_filename(xmldoc, out_path,
                                           gz=out_path.endswith('gz'))
    xmlroot.removeChild(simtable)
