#!/usr/bin/env python

'''
Usage:
    ngst --config <configfile> [-p] --target <ingest_target> [--datafile <datafile>] [--limit=<max_records>]           
    ngst --config <configfile> --list (targets | datastores | globals)

Options:            
    -i --interactive   Start up in interactive mode
    -p --preview       Display records to be ingested, but do not ingest
'''

#
# ngst: command line utility for pushing extracted records into a Mercury data pipeline
#


import os, sys
from contextlib import ContextDecorator
import csv
import json
import logging
from collections import namedtuple

import docopt
from docopt import docopt as docopt_func
from docopt import DocoptExit
from snap import snap, common
from mercury import datamap as dmap
import yaml




class DataStore(object):
    def __init__(self, service_object_registry, **kwargs):
        self.service_object_registry = service_object_registry


    def write(self, recordset, **kwargs):
        '''write each record in <recordset> to the underlying storage medium.
        Implement in subclass.
        '''
        pass


class RecordBuffer(object):
    def __init__(self, datastore, **kwargs):        
        self.data = []
        self.checkpoint_mgr = None        
        self.datastore = datastore


    def writethrough(self, **kwargs):
        '''write the contents of the record buffer out to the underlying datastore.
        Implement in subclass.
        '''
        self.datastore.write(self.data, **kwargs)


    def register_checkpoint(self, checkpoint_instance):
        self.checkpoint_mgr = checkpoint_instance


    def flush(self, **kwargs):        
        self.writethrough(**kwargs)
        self.data = []


    def write(self, record, **kwargs):
        try:
            self.data.append(record) 
            if self.checkpoint_mgr:
                self.checkpoint_mgr.register_write(**kwargs)          
        except Exception as err: 
            raise err


class checkpoint(ContextDecorator):
    def __init__(self, record_buffer, **kwargs):
        checkpoint_interval = int(kwargs.get('interval') or 1)

        self.interval = checkpoint_interval
        self._outstanding_writes = 0
        self._total_writes = 0
        self.record_buffer = record_buffer
        self.record_buffer.register_checkpoint(self)


    @property
    def total_writes(self):
        return self._total_writes

    @property
    def writes_since_last_reset(self):
        return self._outstanding_writes


    def increment_write_count(self):
        self._outstanding_writes += 1
        self._total_writes += 1


    def reset(self):
        self.outstanding_writes = 0


    def register_write(self, **kwargs):
        self.increment_write_count()
        if self.writes_since_last_reset == self.interval:
            self.record_buffer.flush(**kwargs)
            self.reset()


    def __enter__(self):
        return self


    def __exit__(self, *exc):
        self.record_buffer.writethrough()
        return False


def initialize_datastores(transform_config, service_object_registry):
    datastores = {}
    ds_module_name = transform_config['globals']['datastore_module']

    if not len(transform_config['datastores']):
        return datastores        

    for datastore_name in transform_config['datastores']:
        datastore_class_name = transform_config['datastores'][datastore_name]['class']
        klass = common.load_class(datastore_class_name, ds_module_name)
    
        init_params = {}

        param_config_section = transform_config['datastores'][datastore_name]['init_params']
        if param_config_section:
        #for param in transform_config['datastores'][datastore_name].get('init_params', []):
            for param in param_config_section:
                init_params[param['name']] = param['value']
        
        datastore_instance = klass(service_object_registry, **init_params)
        datastores[datastore_name] = datastore_instance
    return datastores


class DatastoreRegistry(object):
    def __init__(self, datastore_dictionary):
        self.data = datastore_dictionary

    def lookup(self, datastore_name):
        if not self.data.get(datastore_name):
            raise NoSuchDatastore(datastore_name)
        return self.data[datastore_name]

    def has_datastore(self, datastore_name):
        return True if self.data.get(datastore_name) else False


IngestTarget = namedtuple('IngestTarget', 'datastore_name checkpoint_interval')


def load_ingest_targets(yaml_config, datastore_registry):
    targets = {}
    for target_name in yaml_config['ingest_targets']:
        datastore = yaml_config['ingest_targets'][target_name]['datastore']
        interval = yaml_config['ingest_targets'][target_name]['checkpoint_interval']

        # verify; this will raise an exception if an invalid datastore is specified
        if not datastore_registry.has_datastore(datastore):
            raise Exception('The ingest target "%s" specifies a nonexistent datastore: "%s". Please check your config file.' 
                            % (target_name, datastore))
        targets[target_name] = IngestTarget(datastore_name=datastore, checkpoint_interval=interval)
    return targets


def lookup_ingest_target_by_name(ingest_target_name, available_ingest_targets):
    if not available_ingest_targets.get(ingest_target_name):
        raise Exception('''The ingest target "%s" specified on the command line does not refer to a valid target. 
                Please check your command syntax or your config file.'''  
                        % ingest_target_name)

    return available_ingest_targets[ingest_target_name]


def initialize_record_buffer(ingest_target, datastore_registry):
    target_datastore = datastore_registry.lookup(ingest_target.datastore_name)
    buffer = RecordBuffer(target_datastore)
    return buffer


def main(args):
    #print(common.jsonpretty(args))
    config_filename = args['<configfile>']
    yaml_config = common.read_config_file(config_filename)
    project_dir = common.load_config_var(yaml_config['globals']['project_home'])
    sys.path.append(project_dir)
    service_object_registry = common.ServiceObjectRegistry(snap.initialize_services(yaml_config))
    datastore_registry = DatastoreRegistry(initialize_datastores(yaml_config, service_object_registry))

    preview_mode = False
    if args['--preview']:
        preview_mode = True
    
    limit = -1
    if args.get('--limit') is not None:
        limit = int(args['--limit'])
    list_mode = False
    stream_input_mode = False
    file_input_mode = False

    available_ingest_targets = load_ingest_targets(yaml_config, datastore_registry)

    if args['--target'] == True and args['<datafile>'] is None:
        stream_input_mode = True
        ingest_target_name = args['<ingest_target>']
        ingest_target = lookup_ingest_target_by_name(ingest_target_name, available_ingest_targets)
        buffer = initialize_record_buffer(ingest_target, datastore_registry)

        record_count = 0
        with checkpoint(buffer, interval=ingest_target.checkpoint_interval):
            while True:
                if record_count == limit:
                    break
                raw_line = sys.stdin.readline()
                line = raw_line.lstrip().rstrip()
                if not len(line):
                    break
                if not preview_mode:
                    buffer.write(line)
                else:
                    print(line)
                record_count += 1

    elif args['<datafile>']:
        file_input_mode = True
        input_file = args['<datafile>']        
        ingest_target_name = args['<ingest_target>']
        ingest_target = lookup_ingest_target_by_name(ingest_target_name, available_ingest_targets)
        buffer = initialize_record_buffer(ingest_target, datastore_registry)

        record_count = 0
        with checkpoint(buffer, interval=ingest_target.checkpoint_interval):
            with open(input_file) as f:
                for line in f:
                    if record_count == limit:
                        break
                    if not preview_mode:
                        buffer.write(line)
                    else:
                        print(line)
                    record_count += 1

    elif args['--list'] == True:        
        if args['targets']:
            for target in yaml_config['ingest_targets']:
                print('::: Ingest target "%s": ' % target)
                print(common.jsonpretty(yaml_config['ingest_targets'][target]))
                        
        if args['datastores']:
            for dstore in yaml_config['datastores']:
                print('::: Datastore alias "%s": ' % dstore)
                print(common.jsonpretty(yaml_config['datastores'][dstore]))

        if args['globals']:
            print('::: Global settings:')
            print(common.jsonpretty(yaml_config['globals']))


if __name__ == '__main__':
    args = docopt.docopt(__doc__)
    main(args)



