from typing import List, Union
import os
import tempfile
import subprocess
import numpy as np
import pandas as pd


Lines = str


class CytosimReader:
    """
    Read data generated with cytosim. Initialize with the path 
    to the folder containing the output data. Reports can then
    be generated and read with the method ``read_report``.
    """

    def __init__(self, folder: str, report_executable: str='report'):
        """
        Create CytosimReader for an output folder containing data generated
        by cytosim simulations.

        :param folder: Path to folder containing cytosim output (cmo) files.
        :param report_executable: If you renamed your cytosim ``report`` executable,
                                  pass the name with this parameter.
        """
        self.folder = os.path.abspath(folder)
        self._folder_reports = os.path.join(folder, 'reports')
        if not os.path.exists(self._folder_reports):
            os.mkdir(self._folder_reports)
        if not os.path.isdir(self._folder_reports):
            msg = 'CytosimReader wants to put reports into folder "{}".'.format(self._folder_reports)
            msg += ' But there is a file with that name! Please move that file if you want to use CytosimReader.'
            raise RuntimeError(msg)
        self.report_exe = report_executable

    def read_report(self, report_identifier: str,
                    aggregate=False, clear=False) -> Union[List['ReportBlock'], pd.DataFrame]:
        """
        Read data from one report. If the report does not exist yet, it will be generated
        in the subfolder ``reports``. The filename will be ``<report_identifier>.txt``,
        where ``:`` delimiter used by cytosim will be replaced by underscores ``_``.
        Data will be either a list of ReportBlock objects (one per recorded frame), or, 
        if aggregate=True, one pandas.DataFrame containing data of all frames.
        To access the actual data of a RecordBlock, use the member RecordBlock.data .

        :param report_identifier: One of the valid identifiers that can be passed as an argument
                                  to cytosim's ``report`` executable; for example ``fiber:points``,
                                  which will create a report file ``reports/fiber_points.txt`` 
                                  and read its content into a pandas.DataFrame .
        :param aggregate: If True, will return data from the report as one big aggregated
                          DataFrame. Otherwise will return a list of ReportBlock objects, one
                          for each recorded frame.
        :param clear: Remove an existing report and re-generate it.

        :return: Either a list of ReportBlock objects (one per recorded frame), or, if aggregate=True,
                 one pandas.DataFrame containing data of all frames.
        """
        split_report_identifier = report_identifier.split(':')
        split_report_identifier = [s.strip() for s in split_report_identifier]
        fname_report = '_'.join(split_report_identifier) + '.txt'
        fname_report = os.path.join(self._folder_reports, fname_report)

        if clear:
            if os.path.exists(fname_report):
                os.remove(fname_report)
        if not os.path.exists(fname_report):
            self._generate_report(report_identifier, fname_report)

        report_blocks = CytosimReader._parse_report_file(fname_report)

        if aggregate:
            return CytosimReader.aggregate(report_blocks)
            
        return report_blocks

    def read_end_to_end_distances(self) -> np.ndarray:
        raise NotImplementedError

    @staticmethod
    def _parse_report_file(fname) -> List['ReportBlock']:
        blocks = []
        current_block = []
        with open(fname, 'rt') as fh:
            for line in fh:
                if not line or line.isspace():
                    continue
                if line.startswith('% end'):
                    blocks.append(ReportBlock.parse(current_block))
                    current_block = []
                    continue
                current_block.append(line)
        return blocks

    def _generate_report(self, report_identifier, fname_report):
        command_args = [self.report_exe, report_identifier]
        with open(fname_report, 'wt') as fh:
            subprocess.call(command_args, cwd=self.folder, stdout=fh)

    @staticmethod
    def aggregate(report_blocks: List['ReportBlock']) -> pd.DataFrame:
        cols = report_blocks[0].data.columns
        cols = cols.insert(0, 'time').insert(0, 'frame')
        df = pd.DataFrame(columns=cols)
        for block_i in report_blocks:
            index = block_i.data.index + len(df)
            block_i.data.index = index
            df_i = pd.DataFrame(columns=cols, index=index)
            df_i['time'] = block_i.time
            df_i['frame'] = block_i.frame
            df_i[block_i.data.columns] = block_i.data[block_i.data.columns]
            df = df.append(df_i)
        return df


_reports_with_fiber_blocks = [
    'fiber:points'
]


class ReportBlock:
    """
    Stores data of a single frame of data generated with cytosim's ``report`` executable.
    Access meta data with members ``frame``, ``time``, ``label``, ``info``, or 
    access the actual data (as a pandas.DataFrame) with member ``data``. E.g., if your
    ReportBlock object is ``block``, access data as ``block.data``.
    """

    def __init__(self, frame: int, time: float, label: str,
                 info: List[str], data: pd.DataFrame):
        self.frame = frame
        self.time = time
        self.label = label
        self.info = info
        self.data = data

    @staticmethod
    def read_data(column_names: str, data_block: str) -> pd.DataFrame:
        tf = tempfile.TemporaryFile('w+t')
        tf.write(column_names)
        tf.write(data_block)
        tf.seek(0)

        data = pd.read_csv(tf, delim_whitespace=True)
        return data

    @staticmethod
    def parse(block: List[Lines]) -> 'ReportBlock':
        frame = ReportBlock._parse_frame(block[0])
        time = ReportBlock._parse_time(block[1])
        label = ReportBlock._parse_label(block[2])
        first_data_line = None
        for i in range(3, len(block)):
            if block[i].startswith('%'):
                continue
            first_data_line = i
            break
        if first_data_line is None:
            msg = "No data found in this block. CytosimReader can't handle your file. "
            msg += "You can open a new issue on the project page gitlab.gwdg.de/ikuhlem/cytosim-reader "
            raise RuntimeError()
        if label not in _reports_with_fiber_blocks:
            info = block[3: first_data_line-1]
            column_names = block[first_data_line-1][2:]
            data_block = ''.join(block[first_data_line:])
            df = ReportBlock.read_data(column_names, data_block)
            return ReportBlock(frame, time, label, info, df)
        info = block[3: first_data_line-2]
        column_names = block[first_data_line-2][2:]
        df_fibers = ReportBlock.parse_fiber_blocks(column_names, block[first_data_line-1:])
        df_last = df_fibers[-1]
        aggregated_df = pd.DataFrame(
            columns=df_last.columns,
            index=pd.Series(range(df_last.index[len(df_last.index)-1]+1))
        )
        for df in df_fibers:
            aggregated_df.loc[df.index] = df
        return ReportBlock(frame, time, label, info, aggregated_df)

    @staticmethod
    def parse_fiber_blocks(column_names: str, block: List[str]) -> List[pd.DataFrame]:
        data_frames = []
        column_names_fiber_block = ['fiber_type', 'fiber_id', 'segmentation']
        combined_columns = pd.Series(column_names_fiber_block + column_names.split())
        current_fiber_block = None
        current_fiber_type = None
        current_fiber_id = None
        current_fiber_segmentation = None
        n_lines = 0
        for line in block:
            if line.startswith('%'):
                if current_fiber_block is not None:
                    df = ReportBlock.read_data(column_names, ''.join(current_fiber_block))
                    df_combined = pd.DataFrame(index=df.index+n_lines, columns=combined_columns)
                    df.index = df_combined.index
                    df_combined.loc[:]['fiber_type'] = current_fiber_type
                    df_combined.loc[:]['fiber_id'] = current_fiber_id
                    df_combined.loc[:]['segmentation'] = current_fiber_segmentation
                    df_combined[df.columns] = df[df.columns]
                    data_frames.append(df_combined)
                    n_lines += len(df)
                s = line[1:].split()
                msg = "Expected block for one fiber, but this block does not seem to start with fiber information!"
                assert s[0] == 'fiber', msg
                current_fiber_block = []
                current_fiber_type, current_fiber_id = s[1].split(':')
                current_fiber_segmentation = s[2]
                continue
            current_fiber_block.append(line)
        return data_frames


    @staticmethod
    def _parse_frame(line) -> int:
        s = line.split()
        assert s[1] == 'frame', "This line does not contain the current frame number."
        return int(s[2])

    @staticmethod
    def _parse_time(line) -> float:
        s = line.split()
        assert s[1] == 'time', "This line does not contain the time of the current frame."
        return float(s[2])

    @staticmethod
    def _parse_label(line):
        s = line.split()
        assert s[1] == 'report', "This line does not contain the report label."
        return ' '.join(s[2:])

    def __str__(self):
        return "ReportBlock \"{}\", frame {}".format(self.label, self.frame)

    def __repr__(self):
        return self.__str__()
