import pickle
import re
import shutil
import warnings
from itertools import repeat
from pathlib import Path

import pandas as pd
from scipy.io import arff

from .structures import Configurations, from_dict, from_old_format
from .utils import load_yaml_file


class Configuration:
    _CONFIG_PATH = Path('conf/config.yaml')

    def __init__(self, other=None):
        if other is None:
            _my_path = Path(__file__).parent
            self._config_file = _my_path / Configuration._CONFIG_PATH
        else:
            other_file = Path(other)
            if other_file.suffix != '.yaml':
                raise IOError(f"Incorrect file extension {repr(other_file.suffix)}. It must be a 'yaml' file.")
            self._config_file = other_file

        if not self._config_file.is_file():
            raise FileNotFoundError(f"Configuration file {repr(self._config_file.name)} "
                                    f"not found in {repr(str(self._config_file.parent))}.")

        self.content = None

    def __enter__(self):
        self.load()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.content.clear()

    def load(self) -> Configurations:
        content = load_yaml_file(self._config_file)
        try:
            config = from_dict(content)
        except KeyError:
            config = from_old_format(content)
            warnings.warn("The format of this configuration file (config.yaml) is deprecated and will raise error "
                          "in future versions. Generate another config file through 'pyhard init' command to get the "
                          "most up-to-date one.")
        self.content = config
        return config

    @DeprecationWarning
    def get(self, key):
        if isinstance(key, str):
            return self.content.get(key)
        elif isinstance(key, list):
            return {k: self.content.get(k) for k in key}

    @DeprecationWarning
    def get_full(self):
        return self.content

    @DeprecationWarning
    def set(self, key, value):
        self.content[key] = value


class DotDict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__


class Workspace:
    """
    Manages the output files generated by the IH/IS analysis, loading them properly and decoupling this task from the
    other modules.
    """

    FILES = {
        'data': 'data.csv',
        'metadata': 'metadata.csv',
        'ih_values': 'ih.csv',
        'is_coordinates': 'coordinates.csv',
        'footprint_performance': 'footprint_performance.csv',
        'algo_bin': 'algorithm_bin.csv',
        'beta_easy': 'beta_easy.csv',
        'good_algos': 'good_algos.csv'
    }
    ISA_COLUMNS = {
        'index': 'Row',
        'dim1': 'z_1',
        'dim2': 'z_2'
    }
    INDEX = 'instances'

    def __init__(self, dir_path, data_path=None):
        path = Path(dir_path)
        # my_path = Path(__file__).parent
        my_path = Path().absolute()
        if not path.is_dir():
            raise NotADirectoryError(f"Invalid path '{str(path)}', it is not a directory.")
        elif not path.is_absolute():
            self.path = my_path / path
        else:
            self.path = path

        if data_path is None:
            self.data_path = self.path / self.FILES['data']
        else:
            path = Path(data_path)
            if not path.is_file():
                raise FileNotFoundError(f"Invalid file path '{str(path)}'.")
            elif not path.is_absolute():
                self.data_path = my_path / path
            else:
                self.data_path = path

        self.files = DotDict(self.FILES)
        self.isa_cols = DotDict(self.ISA_COLUMNS)

        self.data = self.metadata = self.extended_metadata = self.ih_values = None
        self.is_coordinates = self.footprints = self.footprint_performance = None
        self.algo_bin = self.beta_easy = self.good_algos = None
        self.header = None
        self.loaded = False

    def __enter__(self):
        self.load()

    def __exit__(self, exc_type, exc_val, exc_tb):
        del self.data, self.metadata, self.extended_metadata, self.ih_values
        del self.is_coordinates, self.footprints, self.footprint_performance

    def __getstate__(self):
        return self.__dict__

    def __setstate__(self, d):
        self.__dict__.update(d)

    def _call_method(self, name, **kwargs):
        return getattr(self, name)(**kwargs)

    def load(self):
        """
        Loads result files into memory. The following files are expected to be located into `dir_path`:

        - `data.csv`: original dataset
        - `metadata.csv`: generated metadata
        - `ih.csv`: instance hardness values
        - `coordinates.csv`: instance space coordinates
        - `footprint_algo_(good | bad)` like files
        - `footprint_performance.csv`: footprint performance
        - `algorithm_bin.csv`: binarized algorithm performance
        - `beta_easy.csv`: easy/hard instances
        - `good_algos.csv`: number of good algorithms
        """
        if self.data_path.suffix == '.csv':
            self.data = pd.read_csv(self.data_path)
            self.header = dict(zip(self.data.columns, repeat('numeric')))
        elif self.data_path.suffix == '.arff':
            data, header = arff.loadarff(self.data_path)
            self.data = pd.DataFrame(data)
            self.header = dict(zip(header.names(), header.types()))

        # Metadata
        self.metadata = pd.read_csv(self.path / self.files.metadata, index_col=self.INDEX)
        # Instance hardness
        self.ih_values = pd.read_csv(self.path / self.files.ih_values, index_col=self.INDEX)
        # Algorithm bin
        self.algo_bin = pd.read_csv(self.path / self.files.algo_bin, index_col=self.isa_cols.index)
        self.algo_bin.index.name = self.INDEX
        self.algo_bin = self.algo_bin.add_prefix('algo_').add_suffix('_bin')
        # Beta easy
        self.beta_easy = pd.read_csv(self.path / self.files.beta_easy, index_col=self.isa_cols.index)
        self.beta_easy.index.name = self.INDEX
        # Good algos
        self.good_algos = pd.read_csv(self.path / self.files.good_algos, index_col=self.isa_cols.index)
        self.good_algos.index.name = self.INDEX

        df_metadata_ext = self.ih_values.join(self.metadata, how='right')
        df_metadata_ext = df_metadata_ext.join(self.algo_bin)
        df_metadata_ext = df_metadata_ext.join(self.beta_easy)
        df_metadata_ext = df_metadata_ext.join(self.good_algos)

        df_is = pd.read_csv(self.path / self.files.is_coordinates, index_col=self.isa_cols.index)
        df_foot_perf = pd.read_csv(self.path / self.files.footprint_performance, index_col=self.isa_cols.index)
        df_foot_perf.index.name = 'Algorithm'

        pattern = re.compile('(^footprint)_(.+)_(good|bad|best)', re.IGNORECASE)
        footprint_files = [u.name for u in self.path.glob('*.csv')
                           if u.is_file() and bool(pattern.search(u.name))]
        fp_dict = dict()
        for file in footprint_files:
            g = pattern.match(file).groups()
            try:
                fp_dict[(g[1], g[2])] = pd.read_csv(self.path / file,
                                                    usecols=[self.isa_cols.index,
                                                             self.isa_cols.dim1,
                                                             self.isa_cols.dim2],
                                                    index_col=self.isa_cols.index)
            except ValueError:
                continue
        df_footprint = pd.concat(fp_dict)
        df_footprint.reset_index(level=self.isa_cols.index, drop=True, inplace=True)
        df_footprint.index.names = ['algo', 'type']
        df_footprint.sort_index(inplace=True)

        self.data.index = self.metadata.index
        df_is.index.name = df_metadata_ext.index.name

        self.extended_metadata = df_metadata_ext
        self.is_coordinates = df_is
        self.footprints = df_footprint
        self.footprint_performance = df_foot_perf
        self.loaded = True

    @property
    def data(self) -> pd.DataFrame:
        if self._data is not None:
            return self._data
        else:
            raise RuntimeError("Workspace not yet loaded.")

    @data.setter
    def data(self, value):
        self._data = value

    @data.deleter
    def data(self):
        del self._data

    @property
    def metadata(self) -> pd.DataFrame:
        if self._metadata is not None:
            return self._metadata
        else:
            raise RuntimeError("Workspace not yet loaded.")

    @metadata.setter
    def metadata(self, value):
        self._metadata = value

    @metadata.deleter
    def metadata(self):
        del self._metadata

    @property
    def ih_values(self) -> pd.DataFrame:
        if self._ih_values is not None:
            return self._ih_values
        else:
            raise RuntimeError("Workspace not yet loaded.")

    @ih_values.setter
    def ih_values(self, value):
        self._ih_values = value

    @ih_values.deleter
    def ih_values(self):
        del self._ih_values

    @property
    def is_coordinates(self) -> pd.DataFrame:
        if self._is_coordinates is not None:
            return self._is_coordinates
        else:
            raise RuntimeError("Workspace not yet loaded.")

    @is_coordinates.setter
    def is_coordinates(self, value):
        self._is_coordinates = value

    @is_coordinates.deleter
    def is_coordinates(self):
        del self._is_coordinates

    @property
    def footprints(self) -> pd.DataFrame:
        if self._footprints is not None:
            return self._footprints
        else:
            raise RuntimeError("Workspace not yet loaded.")

    @footprints.setter
    def footprints(self, value):
        self._footprints = value

    @footprints.deleter
    def footprints(self):
        del self._footprints

    @property
    def footprint_performance(self) -> pd.DataFrame:
        if self._footprint_performance is not None:
            return self._footprint_performance
        else:
            raise RuntimeError("Workspace not yet loaded.")

    @footprint_performance.setter
    def footprint_performance(self, value):
        self._footprint_performance = value

    @footprint_performance.deleter
    def footprint_performance(self):
        del self._footprint_performance

    @property
    def header(self) -> pd.DataFrame:
        if self._header is not None:
            return self._header
        else:
            raise RuntimeError("Workspace not yet loaded.")

    @header.setter
    def header(self, value):
        self._header = value

    @header.deleter
    def header(self):
        del self._header

    @property
    def extended_metadata(self) -> pd.DataFrame:
        if self._metadata_ext is not None:
            return self._metadata_ext
        else:
            raise RuntimeError("Workspace not yet loaded.")

    @extended_metadata.setter
    def extended_metadata(self, value):
        self._metadata_ext = value

    @extended_metadata.deleter
    def extended_metadata(self):
        del self._metadata_ext

    def get_all_files(self, extended_metadata=True):
        output = {key: self._call_method(f"get_{key}") for key in self.FILES.keys()}

        if extended_metadata:
            _ = output.pop('metadata')
            output['metadata_ext'] = self.extended_metadata.copy()

        return output

    def compress(self, file_name, dest):
        path_dest = Path(dest)
        if self.path == path_dest:
            raise RuntimeError("Workspace dir and dest dir must be different.")
        if not path_dest.is_dir():
            raise NotADirectoryError(f"Destination folder {repr(str(path_dest))} is not a directory.")

        shutil.make_archive(path_dest / file_name, 'zip', self.path)

    def serialize(self, file_name=None):
        if file_name is None:
            file_name = 'result'
        with open(self.path / file_name + '.pickle', 'wb') as file:
            pickle.dump(self.get_all_files(), file)
