#!/bin/env python
""" Apply a naive mass binning, assuming that each bin is fully independent, which 
is a conservative estimate. This clusters to find the most significant foreground, but
leaves the background triggers alone. """

import h5py, numpy, argparse, logging, pycbc, pycbc.events, pycbc.io, lal
import pycbc.version

parser = argparse.ArgumentParser()
parser.add_argument("--version", action="version", version=pycbc.version.git_verbose_msg)
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--statmap-files', nargs='+',
                    help="List of coinc files to be redistributed")
parser.add_argument('--cluster-window', type=float)
parser.add_argument('--output-file', help="name of output file")
args = parser.parse_args()

pycbc.init_logging(args.verbose)

# We apply a dumb factor of the number of bins only, nothing smart for now, 
# no clustering of the background
files = [h5py.File(n) for n in args.statmap_files]

fac = len(args.statmap_files)

# copy over the standard data that is constant
f = h5py.File(args.output_file, "w")
f.attrs['detector_1'] = files[0].attrs['detector_1']
f.attrs['detector_2'] = files[0].attrs['detector_2']
f.attrs['timeslide_interval'] = files[0].attrs['timeslide_interval']

f.attrs['background_time'] = files[0].attrs['background_time']
f.attrs['foreground_time'] = files[0].attrs['foreground_time']
f.attrs['background_time_exc'] = files[0].attrs['background_time_exc']
f.attrs['foreground_time_exc'] = files[0].attrs['foreground_time_exc']

for attr in  ['detector_1', 'detector_2', 'timeslide_interval']:
    for i in range(len(files)-1):
        assert files[i].attrs[attr] == files[i+1].attrs[attr]

# combine times that are foreground vetoed as they may differ between bins
for key in files[0]['segments'].keys():
    if key == 'foreground_veto':
        continue
    f['segments/%s/start' % key] = files[0]['segments/%s/start' % key][:]
    f['segments/%s/end' % key] = files[0]['segments/%s/end' % key][:]

# copy over all the columns in the foreground group, we recalculate ifar/fap later
for key in files[0]['foreground'].keys():
    pycbc.io.combine_and_copy(f, files, 'foreground/%s' % key)
    
# recalculate ifar/fap for the foreground triggers by 
#  applying a trials factor = num_files(num_bins)
ifar = f['foreground/ifar'][:] / fac
coinc_time = f.attrs['foreground_time'] / lal.YRJUL_SI
f['foreground/ifar'][:] = ifar
f['foreground/fap'][:] = 1 - numpy.exp( - coinc_time / ifar)

ifar = f['foreground/ifar_exc'][:] / fac
coinc_time = f.attrs['foreground_time_exc'] / lal.YRJUL_SI
f['foreground/ifar_exc'][:] = ifar
f['foreground/fap_exc'][:] = 1 - numpy.exp( - coinc_time / ifar)

# cluster for the loudest ifar value 

def argmax(v):
    return numpy.argsort(v)[-1]

stat = numpy.core.records.fromarrays([f['foreground/ifar'][:], 
                                      f['foreground/stat'][:]],
                                      names='ifar,stat')
cidx = pycbc.events.cluster_coincs(stat,
                                   f['foreground/time1'][:], 
                                   f['foreground/time2'][:], 
                                   numpy.zeros(len(ifar)),
                                   0, args.cluster_window, argmax=argmax)

# downsample the foreground columns to only the loudest ifar between the multiple files
for key in f['foreground'].keys():
    dset = f['foreground/%s' % key][:][cidx]
    del f['foreground/%s' % key]
    f['foreground/%s' % key] = dset

# If there is a background set (full_data as opposed to injection run), then recalculate 
# the values for its triggers as well
if 'background' in files[0]:  
    for key in files[0]['background'].keys():
        pycbc.io.combine_and_copy(f, files, 'background/%s' % key)
     
    f['background/ifar'][:] =  f['background/ifar'][:] / fac
    
    for key in files[0]['background_exc'].keys():
        pycbc.io.combine_and_copy(f, files, 'background_exc/%s' % key)
     
    f['background_exc/ifar'][:] = f['background_exc/ifar'][:] / fac

    pycbc.io.combine_and_copy(f, files, 'segments/foreground_veto/start')
    pycbc.io.combine_and_copy(f, files, 'segments/foreground_veto/end')


f.close()
