#!/usr/bin/python
#%%
""" This is main module for making predictions. After setup config['py'] (data source), run predict function.
There is also predict_multiple_columns function for predicting more columns. It's not necessary to run all models everytime,
so there is also function compare models, that return results of models on the test data. Then you can config again only good models.

Do not edit this file if you are user, it's not necassary! Only call function from here. Only file to edit is config.py. If you are developer, edit as you need.

There are working examples in main readme and also in test_it module. Particular modules functionality is vissible in visual.py in tests.
"""
import sys
from pathlib import Path
import numpy as np
from prettytable import PrettyTable
import time
import pandas as pd
import argparse
import inspect
import warnings

this_path = Path(__file__).resolve().parents[1]
this_path_string = str(this_path)

# try:
#     import predictit
# except Exception:
#     If used not as a library but as standalone framework, add path to be able to import predictit if not opened in folder
sys.path.insert(0, this_path_string)
import predictit

from predictit.config import config, presets
from predictit.misc import traceback_warning, user_warning

try:
    import gui_start  # Not included in predictit if used as python library
except Exception:
    pass


def update_gui(content, id):
    try:
        gui_start.edit_gui_py(content, id)
    except Exception:
        pass


if config['debug'] == 1:
    warnings.filterwarnings('once')
elif config['debug'] == 2:
    warnings.filterwarnings('error')
else:
    warnings.filterwarnings('ignore')

for i in config['ignored_warnings']:
    warnings.filterwarnings('ignore', message=fr"[\s\S]*{i}*")

if __name__ == "__main__":

    # All the config is in config.py" - rest only for people that know what are they doing
    # Add settings from command line if used
    parser = argparse.ArgumentParser(description='Prediction framework setting via command line parser!')
    parser.add_argument("--use_config_preset", type=str, choices=[None, 'fast'], help="Edit some selected config, other remains the same, check config_presets.py file.")
    parser.add_argument("--used_function", type=str, choices=['predict', 'predict_multiple_columns', 'compare_models', 'validate_predictions'],
                        help="""Which function in main.py use. Predict predict just one defined column, predict_multiple_columns predict more columns at once,
                        compare_models compare models on defined test data, validate_predictions test models on data not used for training""")
    parser.add_argument("--data_source", type=str, choices=['csv', 'test'], help="What source of data to use")
    parser.add_argument("--csv_full_path", type=str, help="Full CSV path with suffix")
    parser.add_argument("--predicts", type=int, help="Number of predicted values - 7 by default")
    parser.add_argument("--predicted_column", type=int, help="Name of predicted column or it's index - string or int")
    parser.add_argument("--predicted_columns", type=list, help="For predict_multiple_columns function only! List of names of predicted column or it's indexes")
    parser.add_argument("--freq", type=str, help="Interval for predictions 'M' - months, 'D' - Days, 'H' - Hours")
    parser.add_argument("--freqs", type=list, help="For predict_multiple_columns function only! List of intervals of predictions 'M' - months, 'D' - Days, 'H' - Hours")
    parser.add_argument("--plot", type=bool, help="If 1, plot interactive graph")
    parser.add_argument("--datetime_index", type=int, help="Index of dataframe or it's name. Can be empty, then index 0 or new if no date column.")
    parser.add_argument("--return_type", type=str, choices=['best', 'all', 'dict', 'models_error_criterion'], help="""What will be returned by function. Best result, all results (array),
                        dict with more results (best, all, string of plot in HTML...) or models_error_criterion with MAPE or RMSE (based on config) of all models""")
    parser.add_argument("--datalength", type=int, help="The length of the data used for prediction")
    parser.add_argument("--debug", type=bool, help="Debug - print all results and all the errors on the way")
    parser.add_argument("--analyzeit", type=bool, help="Analyze input data - Statistical distribution, autocorrelation, seasonal decomposition etc.")
    parser.add_argument("--optimizeit", type=bool, help="Find optimal parameters of models")
    parser.add_argument("--repeatit", type=int, help="How many times is computation repeated")
    parser.add_argument("--other_columns", type=bool, help="If 0, only predicted column will be used for making predictions.")
    parser.add_argument("--default_other_columns_length", type=bool, help="Length of other columns in input vectors")
    parser.add_argument("--lengths", type=bool, help="Compute on various length of data (1, 1/2, 1/4...). Automatically choose the best length. If 0, use only full length.")
    parser.add_argument("--remove_outliers", type=bool, help="Remove extraordinary values. Value is threshold for ignored values. Value means how many times standard deviation from the average threshold is far")
    parser.add_argument("--standardize", type=str, choices=[None, 'standardize', '-11', '01', 'robust'], help="Data standardization, so all columns have similiar scopes")
    parser.add_argument("--error_criterion", type=str, choices=['mape', 'rmse'], help="Error criterion used for model")
    parser.add_argument("--compareit", type=int, help="How many models will be displayed in final plot. 0 if only the best one.")

    # Non empty command line args
    parser_args_dict = {k: v for k, v in parser.parse_known_args()[0].__dict__.items() if v is not None}

    # Edit config.py default values with command line arguments values if exist
    for i, j in parser_args_dict.items():
        if i in config:
            config[i] = j
        else:
            user_warning(f"Inserted option with command line --{i} not found in config.py use --help for more information.\n")


def predict(data=None, predicts=None, predicted_column=None, freq=None, models_parameters=None, data_source=None,
            csv_full_path=None, plot=None, used_models=None, datetime_index=None, return_type=None, datalength=None, data_transform=None, debug=None, analyzeit=None,
            optimizeit=None, repeatit=None, other_columns=None, default_other_columns_length=None, lengths=None, remove_outliers=None, standardize=None, error_criterion=None, compareit=None):

    """Make predictions mostly on time-series data. Data input can be set up in config.py (E.G. csv type, path and name),
    data can be used as input arguments to function (this has higher priority) or it can be setup as command line arguments (highest priority).
    Values defaults are None, because are in config.py

    Returned ranked results are evaluate only on data that was in training set. If you want to have true error criterion, run function validate_predictions.

    Args:
        data (np.ndarray, pd.DataFrame): Time series. Can be 2-D - more columns.
            !!! In Numpy array use data series as rows, but in dataframe use cols !!!. If you use CSV, leave it empty. Defaults to [].
        predicts (int, optional): Number of predicted values. Defaults to None.
        predicted_column (int, str, optional): Index of predicted column or it's name (dataframe).
            If list with more values only the first one will be evaluated (use predict_multiple_columns function if you need that. Defaults to None.
        freq (str. 'H' or 'D' or 'M', optional): If date index available, resample data and predict in defined time frequency. Defaults to None.

        **kwargs (dict): There is much more parameters of predict function. Check config.py for parameters details.

    Returns:
        Depend on 'return_type' config value - return best prediction {np.ndarray}, all models results {np.ndarray}, detailed results{dict} or interactive plot or print tables of results

    """

    _GUI = predictit.misc._GUI
    # Add everything printed + warnings to variable to be able to print in GUI
    if _GUI:
        import io

        stdout = sys.stdout
        sys.stdout = io.StringIO()

    if config["use_config_preset"] and config["use_config_preset"] != 'none':
        config.update(presets[config["use_config_preset"]])
        predictit.config.update_references_1()
        predictit.config.update_references_2()

    # Parse all functions parameters and it's values
    args, _, _, values = inspect.getargvalues(inspect.currentframe())

    # Edit config.py default values with arguments values if exist
    config.update({key: value for key, value in values.items() if key in args & values.keys() and value is not None})

    # Do not repeat actually mean evaluate once
    if not config['repeatit']:
        config['repeatit'] = 1

    predictit.config.update_references_1()
    predictit.config.update_references_2()

    # Definition of the table for spent time on code parts
    time_parts_table = PrettyTable()
    time_parts_table.field_names = ["Part", "Time"]

    def update_time_table(time_last):
        time_parts_table.add_row([progress_phase, time.time() - time_last])
        return time.time()
    time_point = time_begin = time.time()

    #######################################
    ############## LOAD DATA ####### ANCHOR Data
    #######################################

    progress_phase = "Data loading and preprocessing"
    update_gui(progress_phase, 'progress_phase')

    if config['data'] is None:

        ############# Load CSV data #############
        if config['data_source'] == 'csv':
            if config['csv_test_data_relative_path']:
                try:
                    data_location = this_path / 'predictit' / 'test_data'
                    csv_path = data_location / config['csv_test_data_relative_path']
                    config['csv_full_path'] = Path(csv_path).as_posix()
                except Exception:
                    print(f"\n ERROR - Test data load failed - Setup CSV adress and column name in config \n\n")
                    raise
            try:
                config['data'] = pd.read_csv(config['csv_full_path'], header=0).iloc[-config['datalength']:, :]
            except Exception:
                print("\n ERROR - Data load failed - Setup CSV adress and column name in config \n\n")
                raise

        ############# Load SQL data #############
        elif config['data_source'] == 'sql':
            try:
                config['data'] = predictit.database.database_load(server=config['server'], database=config['database'], freq=config['freq'],
                                                                  data_limit=config['datalength'], last=config['last_row'])
            except Exception:
                print("\n ERROR - Data load from SQL server failed - Setup server, database and predicted column name in config \n\n")
                raise

        elif config['data_source'] == 'test':
            config['data'] = predictit.test_data.generate_test_data.gen_random(config['datalength'])

    ##############################################
    ############ DATA PREPROCESSING ###### ANCHOR Preprocessing
    #############################################

    if not config['predicted_column']:
        config['predicted_column'] = 0

    ### Data are used in shape (n_features, n_samples)!!! - other way that usual convention... if iterate data_for_predictions[i] - you have columns

    data_for_predictions, data_for_predictions_df, predicted_column_name = predictit.data_preprocessing.data_consolidation(config['data'])

    predicted_column_index = 0

    multicolumn = 0 if data_for_predictions.shape[0] == 1 else 1

    if config['analyzeit'] == 1 or config['analyzeit'] == 3:
        predictit.analyze.analyze_data(data_for_predictions.T, window=30)

    ########################################
    ############# Data analyze ###### ANCHOR Analyze
    ########################################

    if config['remove_outliers']:
        data_for_predictions = predictit.data_preprocessing.remove_outliers(data_for_predictions, predicted_column_index=predicted_column_index, threshold=config['remove_outliers'])

    if config['correlation_threshold'] and multicolumn:
        data_for_predictions = predictit.data_preprocessing.keep_corelated_data(data_for_predictions, threshold=config['correlation_threshold'])

    ### Data preprocessing, common for all datatypes ###

    column_for_predictions_dataframe = data_for_predictions_df[data_for_predictions_df.columns[0]].to_frame()
    column_for_plot = column_for_predictions_dataframe.iloc[-7 * config['predicts']:]

    last_value = column_for_plot.iloc[-1]

    try:
        number_check = int(last_value)
    except Exception:
        raise ValueError(f"\n ERROR - Predicting not a number datatype. Maybe bad config['predicted_columns'] setup.\n Predicted datatype is {type(last_value[1])} \n\n")

    if config['data_transform'] == 'difference':

        for i in range(len(data_for_predictions)):
            data_for_predictions[i, 1:] = predictit.data_preprocessing.do_difference(data_for_predictions[i])

    if config['standardize']:
        data_for_predictions, final_scaler = predictit.data_preprocessing.standardize(data_for_predictions, used_scaler=config['standardize'])

    column_for_predictions = data_for_predictions[predicted_column_index]

    ############# Processed data analyze ############

    data_shape = np.shape(data_for_predictions)
    data_length = len(column_for_predictions)

    data_std = np.std(data_for_predictions[0, -30:])
    data_mean = np.mean(data_for_predictions[0, -30:])
    data_abs_max = max(abs(column_for_predictions.min()), abs(column_for_predictions.max()))

    multicolumn = 0 if data_shape[0] == 1 else 1

    if config['analyzeit'] == 2 or config['analyzeit'] == 3:
        predictit.analyze.analyze_data(data_for_predictions.T, window=30)

        # TODO repair decompose
        #predictit.analyze.decompose(column_for_predictions_dataframe, freq=36, model='multiplicative')

    min_data_length = 3 * config['predicts'] + config['default_n_steps_in']

    if data_length < min_data_length:
        config['repeatit'] = 1
        min_data_length = 3 * config['predicts'] + config['repeatit'] * config['predicts'] + config['default_n_steps_in']

    assert (min_data_length < data_length), 'To few data - set up less repeat value in settings or add more data'

    if config['lengths']:
        data_lengths = [data_length, int(data_length / 2), int(data_length / 4), min_data_length + 50, min_data_length]
        data_lengths = [k for k in data_lengths if k >= min_data_length]
    else:
        data_lengths = [data_length]

    data_number = len(data_lengths)

    models_names = list(config['used_models'].keys())
    models_number = len(models_names)

    for i in models_names:

        # If no parameters or parameters details, add it so no index errors later
        if i not in config['models_parameters']:
            config['models_parameters'][i] = {}

    # Empty boxes for results definition
    # The final result is - [repeated, model, data, results]
    test_results_matrix = np.zeros((config['repeatit'], models_number, data_number, config['predicts']))
    evaluated_matrix = np.zeros((config['repeatit'], models_number, data_number))
    reality_results_matrix = np.zeros((models_number, data_number, config['predicts']))
    test_results_matrix.fill(np.nan)
    evaluated_matrix.fill(np.nan)
    reality_results_matrix.fill(np.nan)

    models_time = {}
    models_optimizations_time = {}

    time_point = update_time_table(time_point)
    progress_phase = "Predict"
    update_gui(progress_phase, 'progress_phase')

    trained_models = {}

    models_test_outputs = np.zeros((config['repeatit'], config['predicts']))

    for i in range(config['repeatit']):

        models_test_outputs[i] = column_for_predictions[-config['predicts'] - i: - i] if i > 0 else column_for_predictions[-config['predicts'] - i:]

    models_test_outputs = models_test_outputs[::-1]

    used_input_types = []
    for i in models_names:
        used_input_types.append(config['models_input'][i])
    used_input_types = set(used_input_types)

    #######################################
    ############# Main loop ######## ANCHOR Main loop
    #######################################

    # Repeat evaluation on shifted data to eliminate randomness
    for data_length_index, data_length_iteration in enumerate(data_lengths):

        for input_type in used_input_types:
            try:
                used_sequention = predictit.define_inputs.create_inputs(input_type, data_for_predictions, predicted_column_index=predicted_column_index)
            except Exception:
                traceback_warning(f"Error in creating sequentions on input type: {input_type} model on data length: {data_length_iteration}")
                continue

            for iterated_model_index, (iterated_model_name, iterated_model) in enumerate(config['used_models'].items()):
                if config['models_input'][iterated_model_name] == input_type:

                    if config['models_input'][iterated_model_name] in ['one_step', 'one_step_constant']:
                        if multicolumn and config['predicts'] > 1:

                            user_warning(f"""Warning in model {iterated_model_name} \n\nOne-step prediction on multivariate data (more columns).
                                             Use batch (y lengt equals to predict) or do use some one column data input in config models_input or predict just one value.""")
                            continue

                    if isinstance(used_sequention, tuple):
                        model_train_input = (used_sequention[0][data_length - data_length_iteration:, :], used_sequention[1][data_length - data_length_iteration:, :])
                        model_predict_input = used_sequention[2]
                        model_test_inputs = used_sequention[3]

                    elif used_sequention.ndim == 1:
                        model_train_input = model_predict_input = used_sequention[data_length - data_length_iteration:]
                        model_test_inputs = []
                        for i in range(config['repeatit']):
                            model_test_inputs.append(used_sequention[data_length - data_length_iteration: - config['predicts'] - config['repeatit'] + i + 1])

                    else:
                        model_train_input = model_predict_input = used_sequention[:, data_length - data_length_iteration:]
                        model_test_inputs = []
                        for i in range(config['repeatit']):
                            model_test_inputs.append(used_sequention[:, data_length - data_length_iteration: - config['predicts'] - config['repeatit'] + i + 1])

                    if config['optimizeit'] and data_length_index == 0:
                        if iterated_model_name in config['models_parameters_limits']:

                            try:
                                start_optimization = time.time()

                                best_kwargs = predictit.best_params.optimize(iterated_model, config['models_parameters'][iterated_model_name], config['models_parameters_limits'][iterated_model_name],
                                                                             model_train_input=model_train_input, model_test_inputs=model_test_inputs, models_test_outputs=models_test_outputs,
                                                                             fragments=config['fragments'], iterations=config['iterations'], time_limit=config['optimizeit_limit'],
                                                                             error_criterion=config['error_criterion'], name=iterated_model_name, details=config['optimizeit_details'])

                                for k, l in best_kwargs.items():

                                    config['models_parameters'][iterated_model_name][k] = l

                            except Exception:
                                traceback_warning("Optimization didn't finished")

                            finally:
                                stop_optimization = time.time()
                                models_optimizations_time[iterated_model_name] = (stop_optimization - start_optimization)

                    try:
                        start = time.time()

                        # Train all models
                        trained_models[iterated_model_name] = iterated_model.train(model_train_input, **config['models_parameters'][iterated_model_name])

                        # Create predictions - out of sample
                        one_reality_result = iterated_model.predict(model_predict_input, trained_models[iterated_model_name], predicts=config['predicts'])

                        if np.isnan(np.sum(one_reality_result)) or one_reality_result is None:
                            continue

                        # Remove wrong values out of scope to not be plotted

                        one_reality_result[abs(one_reality_result) > 3 * data_abs_max] = np.nan

                        # Do inverse data preprocessing
                        if config['power_transformed'] == 1:
                            one_reality_result = predictit.data_preprocessing.fitted_power_transform(one_reality_result, data_std, data_mean)

                        if config['standardize']:
                            one_reality_result = final_scaler.inverse_transform(one_reality_result.reshape(-1, 1)).ravel()

                        if config['data_transform'] == 'difference':
                            one_reality_result = predictit.data_preprocessing.inverse_difference(one_reality_result, last_value)

                        reality_results_matrix[iterated_model_index, data_length_index] = one_reality_result

                        # Predict many values in test inputs to evaluate which models are best - do not inverse data preprocessing, because test data are processed
                        for repeat_iteration in range(config['repeatit']):

                            # Create in-sample predictions to evaluate if model is good or not

                            test_results_matrix[repeat_iteration, iterated_model_index, data_length_index] = iterated_model.predict(model_test_inputs[repeat_iteration], trained_models[iterated_model_name], predicts=config['predicts'])

                            if config['power_transformed'] == 2:
                                test_results_matrix[repeat_iteration, iterated_model_index, data_length_index] = predictit.data_preprocessing.fitted_power_transform(test_results_matrix[repeat_iteration, iterated_model_index, data_length_index], data_std, data_mean)

                            evaluated_matrix[repeat_iteration, iterated_model_index, data_length_index] = predictit.evaluate_predictions.compare_predicted_to_test(test_results_matrix[repeat_iteration, iterated_model_index, data_length_index],
                                                                                                                                                                   models_test_outputs[repeat_iteration], error_criterion=config['error_criterion'])

                    except Exception:
                        traceback_warning(f"Error in {iterated_model_name} model on data length {data_length_iteration}")

                    finally:
                        models_time[iterated_model_name] = (time.time() - start)

    #############################################
    ############# Evaluate models ######## ANCHOR Evaluate
    #############################################

    # Criterion is the best of average from repetitions
    time_point = update_time_table(time_point)
    progress_phase = "Evaluation"
    update_gui(progress_phase, 'progress_phase')

    repeated_average = np.mean(evaluated_matrix, axis=0)

    model_results = []

    for i in repeated_average:
        model_results.append(np.nan if np.isnan(i).all() else np.nanmin(i))

    sorted_results = np.argsort(model_results)

    if config['compareit']:
        sorted_results = sorted_results[:config['compareit']]
    else:
        sorted_results = sorted_results[0]

    predicted_models = {}

    for i, j in enumerate(sorted_results):
        this_model = list(config['used_models'].keys())[j]

        if i == 0:
            best_model_name = this_model

        predicted_models[this_model] = {'order': i + 1, 'error_criterion': model_results[j], 'predictions': reality_results_matrix[j, np.argmin(repeated_average[j])], 'data_length': np.argmin(repeated_average[j])}

    best_model_predicts = predicted_models[best_model_name]['predictions']
    complete_dataframe = column_for_plot.copy()

    if config['confidence_interval'] == 'default':
        try:
            lower_bound, upper_bound = predictit.misc.confidence_interval(column_for_plot, predicts=config['predicts'], confidence=config['confidence_interval'])
            complete_dataframe['Lower bound'] = complete_dataframe['Upper bound'] = None
            bounds = True
        except Exception:
            bounds = False
            traceback_warning("Error in compute confidence interval")

    else:
        bounds = False

    last_date = column_for_plot.index[-1]

    if isinstance(last_date, (pd.core.indexes.datetimes.DatetimeIndex, pd._libs.tslibs.timestamps.Timestamp)):
        date_index = pd.date_range(start=last_date, periods=config['predicts'] + 1, freq=column_for_plot.index.freq)[1:]
        date_index = pd.to_datetime(date_index)

    else:
        date_index = list(range(last_date + 1, last_date + config['predicts'] + 1))

    results_dataframe = pd.DataFrame(data={'Lower bound': lower_bound, 'Upper bound': upper_bound}, index=date_index) if bounds else pd.DataFrame(index=date_index)

    for i, j in predicted_models.items():
        if 'predictions' in j:
            results_dataframe[f"{j['order']} - {i}"] = j['predictions']
            complete_dataframe[f"{j['order']} - {i}"] = None
            if j['order'] == 1:
                best_model_name_plot = f"{j['order']} - {i}"

    last_value = float(column_for_plot.iloc[-1])

    complete_dataframe = pd.concat([complete_dataframe, results_dataframe], sort=False)
    complete_dataframe.iloc[-config['predicts'] - 1] = last_value

    #######################################
    ############# Plot ############# ANCHOR Plot
    #######################################

    time_point = update_time_table(time_point)
    progress_phase = "plot"
    update_gui(progress_phase, 'progress_phase')

    if config['plot']:

        plot_return = 'div' if _GUI else ''
        div = predictit.plot.plotit(complete_dataframe, plot_type=config['plot_type'], show=config['show_plot'], save=config['save_plot'], save_path=config['save_plot_path'],
                                    plot_return=plot_return, best_model_name=best_model_name_plot, predicted_column_name=predicted_column_name)


    ####################################
    ############# Results ####### ANCHOR Results
    ####################################

    if config['print']:
        if config['print_result']:
            print(f"\n Best model is {best_model_name} \n\t with results {best_model_predicts} \n\t with model error {config['error_criterion']} = {predicted_models[best_model_name]['error_criterion']}")
            print(f"\n\t with data length {data_lengths[predicted_models[best_model_name]['data_length']]} \n\t with paramters {config['models_parameters'][best_model_name]} \n")


        # Table of results
        models_table = PrettyTable()
        models_table.field_names = ['Model', f"Average {config['error_criterion']} error", "Time"]
        # Fill the table
        for i, j in predicted_models.items():
            if i in models_time:
                models_table.add_row([i, j['error_criterion'], models_time[i]])

        if config['print_table']:
            print(f'\n {models_table} \n')

        time_parts_table.add_row(['Complete time', time.time() - time_begin])

        if config['print_time_table']:
            print(f'\n {time_parts_table} \n')

        ### Print detailed resuts ###
        if config['print_detailed_result']:

            for i, j in enumerate(models_names):
                print(models_names[i])

                for k in range(data_number):
                    print(f"\t With data length: {data_lengths[k]}  {config['error_criterion']} = {repeated_average[i, k]}")

                if config['optimizeit']:
                    if j in models_optimizations_time:
                        print(f"\t Time to optimize {models_optimizations_time[j]} \n")
                    print("Best models parameters", config['models_parameters'][j])

    time_point = update_time_table(time_point)
    progress_phase = 'finished'
    update_gui(progress_phase, 'progress_phase')

    # Return stdout and stop collect warnings and printed output
    if _GUI:
        output = sys.stdout.getvalue()
        sys.stdout = stdout

        print(output)

    ################################
    ########### Return ###### ANCHOR Return
    ################################

    if config['return_type'] == 'models_error_criterion':
        return repeated_average

    elif config['return_type'] == 'all':
        return reality_results_matrix

    elif not config['return_type'] or config['return_type'] == 'best':
        return best_model_predicts

    elif config['return_type'] == 'results dataframe':
        return results_dataframe

    elif config['return_type'] == 'detailed results dictionary':
        detailed_results_dictionary = {
            'predicted_models': predicted_models,
            'best': best_model_predicts,
            'all': reality_results_matrix,
            'time_table': time_parts_table.get_html_string(),
            'models_table': models_table.get_html_string(),
        }

        if _GUI:
            detailed_results_dictionary['plot'] = div
            detailed_results_dictionary['output'] = output

        return detailed_results_dictionary


def predict_multiple_columns(data=None, predicted_columns=None, freqs=None, database_deploy=None, predicts=None, models_parameters=None, data_source=None, error_criterion=None, compareit=None,
                             csv_full_path=None, plot=None, used_models=None, datetime_index=None, return_all=None, datalength=None, data_transform=None, debug=None, analyzeit=None,
                             optimizeit=None, repeatit=None, other_columns=None, default_other_columns_length=None, lengths=None, remove_outliers=None, standardize=None):
    """Predict multiple colums and multiple frequencions at once. Use predict function.

    Args:
        data (np.ndarray, pd.DataFrame): Time series. Can be 2-D - more columns.
            !!! In Numpy array use data series as rows, but in dataframe use cols !!!. Defaults to [].
        predicted_columns (list, optional): List of indexes of predicted columns or it's names (dataframe). Defaults to None.
        freqs (str. 'H' or 'D' or 'M', optional): If date index available, resample data and predict in defined time frequency. Defaults to [].
        database_deploy (bool, optional): Whether deploy results to database !!!
            For every database it's necessary to adjust the database function. Defaults to 0.

    Returns:
        np.ndarray: All the predicted results.
    """

    # Parse all functions parameters and it's values
    args, _, _, values = inspect.getargvalues(inspect.currentframe())

    # Edit config.py default values with arguments values if exist
    config.update({key: value for key, value in values.items() if key in args & values.keys() and value is not None})

    freqs_length = len(config['freqs']) if config['freqs'] else 1
    columns_length = len(config['predicted_columns']) if config['predicted_columns'] else 1

    predictions_full = [np.nan] * freqs_length

    for j in range(freqs_length):

        predictions = [np.nan] * columns_length

        for i in range(columns_length):

            try:
                predictions[i] = predict(predicted_column=config['predicted_columns'][i], freq=config['freqs'][j])

            except Exception:
                traceback_warning(f"Error in making predictions on column {config['predicted_columns'][i]} and freq {config['freqs'][j]}")

        predictions_full[j] = predictions

        # if config['database_deploy']:
        #     try:
        #         predictit.database.database_deploy(config["server"], config["database"], last_date, predictions[0], predictions[1], freq=config['freqs'][j])
        #     except Exception:
        #         traceback_warning(f"Error in database deploying on freq {j}")

    return predictions_full


def compare_models(data_all=None, predicts=None, predicted_column=None, freq=None, models_parameters=None, data_source=None, compareit=None,
                   csv_full_path=None, plot=None, used_models=None, datetime_index=None, return_all=None, datalength=None, data_transform=None, debug=None, analyzeit=None,
                   optimizeit=None, repeatit=None, other_columns=None, default_other_columns_length=None, lengths=None, remove_outliers=None, standardize=None, error_criterion=None,):
    """Function that helps to choose apropriate models. It evaluate it on test data and then return results.
    After you know what models are the best, you can use only them in functions predict() or predict_multiple_columns.
    You can define your own test data and find best modules for your process. You can pickle data if you are use it
    more often to faster loading.

    Args:
        data_all (dict): Dictionary of data names and data values (np.array). You can use data from test_data module, generate_test_data script (e.g. gen_sin()).
        **kwargs (dict): All parameters of predict function. Check config.py for parameters details.

    """

    # Parse all functions parameters and it's values
    args, _, _, values = inspect.getargvalues(inspect.currentframe())

    # Edit config.py default values with arguments values if exist
    config.update({key: value for key, value in values.items() if key in args & values.keys() and value is not None})

    # If no data_all inserted, default will be used
    if config['data_all'] is None:
        config['data_all'] = {'sin': predictit.test_data.generate_test_data.gen_sin(config['datalength']), 'Sign': predictit.test_data.generate_test_data.gen_sign(config['datalength']),
                              'Random data': predictit.test_data.generate_test_data.gen_random(config['datalength'])}

    ### Pickle option was removed, add if you need it...
    # if config['pickleit']:
    #     from predictit.test_data.pickle_test_data import pickle_data_all
    #     import pickle
    #     pickle_data_all(config['data_all'], datalength=config['datalength'])

    # if config['from_pickled']:

    #     script_dir = Path(__file__).resolve().parent
    #     data_folder = script_dir / "test_data" / "pickled"

    #     for i, j in config['data_all'].items():
    #         file_name = i + '.pickle'
    #         file_path = data_folder / file_name
    #         try:
    #             with open(file_path, "rb") as input_file:
    #                 config['data_all'][i] = pickle.load(input_file)
    #         except Exception:
    #             traceback_warning(f"Test data not loaded - First in config['py'] pickleit = 1, that save the data on disk, then load from pickled.")

    results = {}

    for i, j in config['data_all'].items():
        config['plot_name'] = i
        try:
            result = predict(data=j, return_type="models_error_criterion")
            results[i] = (result - np.nanmin(result)) / (np.nanmax(result) - np.nanmin(result))

        except Exception:
            traceback_warning(f"Comparison for data {i} didn't finished.")
            results[i] = np.nan

    print(list(results.values()))


    results_array = np.stack(list(results.values()), axis=0)

    # results_array[np.isnan(results_array)] = np.inf

    all_data_average = np.nanmean(results_array, axis=0)

    models_best_results = []


    for i in all_data_average:
        models_best_results.append(np.nan if np.isnan(i).all() else np.nanmin(i))
    models_best_results = np.array(models_best_results)

    best_compared_model = int(np.nanargmin(models_best_results))
    best_compared_model_name = list(config['used_models'].keys())[best_compared_model]

    all_lengths_average = np.nanmean(all_data_average, axis=0)
    best_all_lengths_index = np.nanargmin(all_lengths_average)

    print("\n\nTable of complete results. Percentual standardized error is between 0 and 1. If 0, model was the best on all defined data, 1 means it was the worst.")
    models_table = PrettyTable()
    models_table.field_names = ['Model', 'Percentual standardized error']

    # Fill the table
    for i, j in enumerate(config['used_models']):
        models_table.add_row([j, models_best_results[i]])

    print(f'\n {models_table} \n')

    print(f"\n\nBest model is {best_compared_model_name}")
    print(f"\n\nBest data length index is {best_all_lengths_index}")


def validate_predictions(data_list=None, data=None, predicts=None, predicted_column=None, freq=None, models_parameters=None, data_source=None, compareit=None,
                         csv_full_path=None, plot=None, used_models=None, datetime_index=None, return_type=None, datalength=None, data_transform=None, debug=None, analyzeit=None,
                         optimizeit=None, repeatit=None, other_columns=None, default_other_columns_length=None, lengths=None, remove_outliers=None, standardize=None, error_criterion=None):
    """Function that evaluate models on test data that was not in train data. It create interactive plots of models results compared with test data.
    It is recommended to use this function in jupyter notebook because of big number of plots that jupyter do not wait to quit. Typical use case is that we have just one data
    and we separate it on chunks. We can see if model is able of generalization. Result error criterions are printed. It has number - order - as it is good
    on data in train set, it has alse error on test set. Error criterion can be 'rmse', 'mape', or similiarity based 'dtw' (dynamic time warping).

    Args:
        data_list (dict): Dictionary of data names and data values (np.array). You can use data from test_data module, generate_test_data script (e.g. gen_sin()).
        **kwargs (dict): All parameters of predict function. Check config.py for parameters details.

    """
    import json

    config.update({'plot': 0, 'return_type': 'results dataframe', 'confidence_interval': 0, 'print': 0})

    for i, data_part in enumerate(config['data_list']):

        results_error_criterion = {}

        data_for_predictions, data_for_predictions_df, predicted_column_name = predictit.data_preprocessing.data_consolidation(
            data_part, predicted_column=config['predicted_column'], datalength=config['datalength'], other_columns=config['other_columns'],
            do_remove_outliers=config['remove_outliers'], datetime_index=config['datetime_index'], freq=config['freq'], dtype=config['dtype'])

        column_for_plot = data_for_predictions_df.iloc[-7 * config['predicts']: -config['predicts'], 0]

        train, test = predictit.data_preprocessing.split(data_for_predictions, predicts=config['predicts'])

        results_dataframe = predict(train)

        for col in results_dataframe.columns:
            results_error_criterion[col] = predictit.evaluate_predictions.compare_predicted_to_test(results_dataframe[col], test, train=train, error_criterion=config['error_criterion'])

        results_dataframe['Test'] = test

        complete_dataframe = pd.concat([column_for_plot, results_dataframe], sort=False)
        complete_dataframe.iloc[-config['predicts'] - 1] = float(column_for_plot.iloc[-1])


        predictit.plot.plotit(complete_dataframe, plot_type=config['plot_type'], show=config['show_plot'], save=config['save_plot'], save_path=config['save_plot_path'], plot_return=False, best_model_name='Test', predicted_column_name=predicted_column_name)

        results_for_printing = json.dumps(results_error_criterion, indent=4)

        print(f"\nOrder - model - {config['error_criterion']}\n\n{results_for_printing[2: -1]}")


if __name__ == "__main__" and config['used_function']:
    if config['used_function'] == 'predict':
        prediction_results = predict()

    elif config['used_function'] == 'predict_multiple_columns':
        prediction_results = predict_multiple_columns()

    elif config['used_function'] == 'compare_models':
        compare_models()

    elif config['used_function'] == 'validate_predictions':
        validate_predictions()
