# -*- coding: utf-8 -*-
"""Session 2: Simple Moving Average Rule with LSTM.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1wtdBupaopWY9gblnqgVFyFCYXX2-Chle

# 1. Import Data 
1.   Install the Alpha Vantage API
2.   [Claim your own API Key](https://www.alphavantage.co/support/#api-key)
3.   [Import data from Time Series Stock APIs by specifiying API key and Parameters](https://www.alphavantage.co/documentation/)

Install Alpha Vantage Package
"""



from alpha_vantage.timeseries import TimeSeries
API_key = 'RY5PVLL8EDHDRLEG'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime 
from datetime import date
import math
import pandas_datareader as web

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

"""Program import data class"""

class Data:
  def __init__(self,API_key, symbol):
    self.API_key = API_key
    self.symbol = symbol
  def import_data(self):
    ts = TimeSeries(key=self.API_key, output_format='pandas')
    data = ts.get_daily(self.symbol, outputsize = 'full')
    data[0].rename(columns={'1. open':'open', '2. high':'high', '3. low':'low', '4. close':'close', '5. volume':'volume'}, inplace = True)
    df = data[0]
    return df

"""return data with specified API and parameters: 

we use Tesla (TSLA) and Apple (AAPL) with 5-min time interval as case studies.  
"""

TSLA=Data('RY5PVLL8EDHDRLEG','TSLA')
df_TSLA=TSLA.import_data()
df_TSLA.sort_index(ascending=True, inplace=True)
df_TSLA.head()

AAPL=Data('RY5PVLL8EDHDRLEG','AAPL')
df_AAPL=AAPL.import_data()
df_AAPL.sort_index(ascending=True, inplace=True)
df_AAPL.head()

"""# 2. Build Neural Network

LSTM stands for Long Short-Term Memory Model, which is able to store information over a period of time, and [Liu et al's (2018)](https://ieeexplore.ieee.org/abstract/document/8398183) paper shows that the LSTM recurrent neural networks is able to filter and extract feature value and analyze the stock data, thus we use this colab to set up the the prediction model of the corresponding stock transaction. We also get inspiration from [Sang and Di Pierro's paper (2019)](https://doi.org/10.1016/j.jfds.2018.10.003) to make a comparison of Return on Investment (RoI) with traditional Simple Moving Average (SMA) Strategy. The pure SMA strategy over trading on two stocks is implemented in [Session 1](https://colab.research.google.com/drive/1KgtvSVkbG4_wAzOMOYAtogS18sDcGekG?usp=sharing).

First preprocess the data by separating them from data training and testing group for preparation.
"""

class data_cleaning:
  def __init__(self, dataset):
    self.dataset = dataset
  def data_training(self):
    num = int(3/5 * self.dataset.shape[0])
    training = self.dataset.iloc[:num].copy()
    return training
  def data_testing(self):
    num = int(3/5 * self.dataset.shape[0])
    testing = self.dataset.iloc[num:].copy() 
    predict_date = testing.index
    return testing, predict_date

TSLA_cleaning = data_cleaning(df_TSLA)
TSLA_training = TSLA_cleaning.data_training()
TSLA_training

TSLA_testing = TSLA_cleaning.data_testing()[0]
TSLA_testing

TSLA_predict_date = TSLA_cleaning.data_testing()[1]
TSLA_predict_date

AAPL_cleaning = data_cleaning(df_AAPL)
AAPL_training = AAPL_cleaning.data_training()
AAPL_training

AAPL_testing = AAPL_cleaning.data_testing()[0]
AAPL_testing

AAPL_predict_date = AAPL_cleaning.data_testing()[1]
AAPL_predict_date

"""Then we build the LSTM Neural Network to create datasets by LSTM Neural Network's prediction, and plot the actual and predicted price respectively."""

class Stock_LSTM:
  def __init__(self, dataset1, dataset2, topic, predict_date):
    self.dataset1 = dataset1
    self.dataset2 = dataset2
    self.topic = topic
    self.predict_date = predict_date
  def build(self):
    scalar = MinMaxScaler()
    
    #use your training data to change the dataset1 one
    data_training_scaled = scalar.fit_transform(self.dataset1)
    X_train = []
    y_train = []
    
    ## Explain what is x and y?
    for i in range(60, self.dataset1.shape[0]):
      X_train.append(data_training_scaled[i-60: i])
      y_train.append(data_training_scaled[i, 0])
    X_train, y_train = np.array(X_train), np.array(y_train)
  
    regressor = Sequential()

    regressor.add(LSTM(units = 50, activation = 'relu', return_sequences = True, input_shape = (X_train.shape[1], 5)))
    regressor.add(Dropout(0.2))

    regressor.add(LSTM(units = 60, activation = 'relu', return_sequences = True))
    regressor.add(Dropout(0.3))

    regressor.add(LSTM(units = 80, activation = 'relu', return_sequences = True))
    regressor.add(Dropout(0.4))

    regressor.add(LSTM(units = 120, activation = 'relu'))
    regressor.add(Dropout(0.5))

    regressor.add(Dense(units = 1))
    regressor.summary()
    # Compiling the RNN
    regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
    regressor.fit(X_train, y_train, epochs=50, batch_size = 64)

    past_60 = self.dataset2.tail(60)
    dt = past_60.append(self.dataset2, ignore_index = True)
    inputs = scalar.fit_transform(dt)
    X_test = []
    y_test = []

    for i in range(60, inputs.shape[0]):
      X_test.append(inputs[i-60:i])
      y_test.append(inputs[i, 0])
    
    X_test, y_test = np.array(X_test), np.array(y_test)
    y_pred = regressor.predict(X_test)
    
    #scale = 1/scalar.scale_[0]
    scale = 97.54
    y_pred = y_pred*scale
    y_test = y_test*scale
    
    y_pred = pd.DataFrame(y_pred, index = self.predict_date)
    y_test = pd.DataFrame(y_test, index = self.predict_date)
    
    plt.figure(figsize=(28,12))
    plt.plot(y_test, color = 'red', label = self.topic + ' Price')
    plt.plot(y_pred, color = 'blue', label = 'Predicted ' + self.topic + ' Price')
    plt.title(self.topic + ' Price Prediction - After 50 epochs and Batch Size = 64')
    plt.xlabel('Time')
    plt.ylabel('Price')
    plt.legend()
    plt.show()
    return y_pred

TSLA_LSTM = Stock_LSTM(TSLA_training,TSLA_testing,'TSLA', TSLA_predict_date)
TSLA_LSTM_data = TSLA_LSTM.build()
TSLA_LSTM_data

df_predict_TSLA = TSLA_LSTM_data.set_axis(['close'], axis=1, inplace=False)
df_predict_TSLA

AAPL_LSTM = Stock_LSTM(AAPL_training,AAPL_testing,'AAPL',AAPL_predict_date)
AAPL_LSTM_data = AAPL_LSTM.build()
AAPL_LSTM_data

df_predict_AAPL = AAPL_LSTM_data.set_axis(['close'], axis=1, inplace=False)
df_predict_AAPL

"""# 3. Generate buy and sell signals with Visualizations

Program the signal class

https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html

The Simple moving average with window $n$ at time $t$:

(1) $$SMA_t^n=\frac{1}{n}\sum_{i=t-n+1}^{t}p_i$$
"""

class signal:
  def __init__(self, data, short_window, long_window):
    self.data = data
    self.short_window = short_window
    self.long_window = long_window
  def signals(self):
    signals = pd.DataFrame(index=self.data.index)
    signals = signals.sort_values(by='date')
    signals['signal'] = 0.0
    signals['short_ma'] = self.data['close'].rolling(window=self.short_window, min_periods=self.short_window, center=True, closed='right').mean()
    signals['long_ma'] = self.data['close'].rolling(window=self.long_window, min_periods=self.long_window, center=True, closed='right').mean()  
    signals['signal'][self.short_window:] = np.where(signals['short_ma'][self.short_window:]>signals['long_ma'][self.short_window:], 1.0, 0.0)     
    signals['positions'] = signals['signal'].diff()
    signals = signals.dropna()
    return signals

"""Return signals with input data and short and long windows. 

We use Tesla (TSLA) and Apple (AAPL) with 5-min time interval as case studies.

Short_window = 50

Long_window = 200

Initially, we utilize one of the simplest yet most effective ways to filter the signal, the Simple Moving Average S.M.A., calculated by summing up the prices of
instrument closure over a certain number of single periods (for instance, 15 minute samples, 30 minutes samples, 1 hour samples, etc). This value is then divided by the number of such periods. In [Kablan and Falzon's paper](doi.org/10.5281/zenodo.1328892) and [Silva et al's IEEE Paper](10.1109/IJCNN.2014.6889835), they suggest to choose 5-min and 10-min as the short and long window for the crossover simulation.
"""

SMA_predict_TSLA = signal(df_predict_TSLA, 5, 10)
SMA_predict_TSLA

Signal_predict_TSLA = SMA_predict_TSLA.signals()
Signal_predict_TSLA.head()

SMA_predict_AAPL = signal(df_predict_AAPL, 5, 10)
SMA_predict_AAPL

Signal_predict_AAPL = SMA_predict_AAPL.signals()
Signal_predict_AAPL.head()

"""Define signal visualization class"""

class signal_figure:
  def __init__(self, prices, signals, topic):
    self.prices = prices
    self.signals = signals
    self.topic = topic
  def signal_figure(self):
    close = self.prices['close']
    close = close.to_frame()
    close = close.merge(self.signals,how='inner',left_index=True, right_index=True)
    close = close.sort_values(by='date')
    fig = plt.figure(figsize = (18,8))
    plt.plot(close.close, color='g', lw=1., label = 'Stock Price')
    plt.plot(close.short_ma, color='r', lw=1., label = 'Short Window Moving Average')
    plt.plot(close.long_ma, color='b', lw=1., label = 'Long Window Moving Average')
    plt.plot(close.loc[close.positions == 1.0].index, close.short_ma[close.positions == 1.0], '^', markersize=5, color='green',label = 'buying signal')
    plt.plot(close.loc[close.positions == -1.0].index, close.long_ma[close.positions == -1.0],'v', markersize=5, color='red',label = 'selling signal')
    plt.xlabel('Date')
    plt.ylabel('Dollars')
    plt.title(self.topic + ' Predicted Stock Price With Buying and Selling Signal Generated By SMA')
    plt.legend()
    plt.show()
    return close

"""Return plots with input data.

We use Tesla (TSLA) and Apple (AAPL) with 5-min time interval as case studies.

Short_window = 50

Long_window = 200
"""

SMA_predict_TSLA_Figure = signal_figure(df_predict_TSLA, Signal_predict_TSLA, 'TSLA')
SMA_Signal_predict_TSLA = SMA_predict_TSLA_Figure.signal_figure()

SMA_Signal_predict_TSLA.head()

SMA_predict_AAPL_Figure = signal_figure(df_predict_AAPL, Signal_predict_AAPL, 'AAPL')
SMA_Signal_predict_AAPL = SMA_predict_AAPL_Figure.signal_figure()

SMA_Signal_predict_AAPL.head()

"""# 4. Generate Return of Investment and Portfolio Flows (cash, holding and total)

Program the portfolio class
"""

class portfolio:
  def __init__(self,data,topic,initial_capital=10000,max_buy=10000000,max_sell=10000000):
    self.data = data
    self.topic = topic
    self.initial_capital = initial_capital
    self.max_buy = max_buy
    self.max_sell = max_sell
  def portfolios(self):
    management = self.data
    prices = self.data['close']
    states = self.data['positions']
    states_buy = []
    states_sell = []
    cashes = []
    stocks = []
    holdings = []
    cash = self.initial_capital
    stock = 0
    holding = 0
    state = 0
    def buy(i,cash,stock,price):
      shares = cash // price #shares to buy in integer
      if shares<1:
        print('order %d: total cash %f, not enough to buy 1 share at price %f' % (i, cash, price))
      else:
        if shares>self.max_buy:
            buy_units = self.max_buy
        else:
            buy_units = shares
        cost = buy_units*price
        cash -= cost
        stock += buy_units
        holding = stock*price
        print('index %d: buy %d units at price %f, current cash %f, current stock %f,current holding %f' % (i, buy_units, price, cash, stock, holding))
        return cash, stock, holding   
    def sell(i,cash, stock,price):
       if stock == 0:
         print('index %d: cannot sell anything, currentstock 0' % (i))
       else:
         if stock > self.max_sell:
            sell_units = self.max_sell
         else: 
            sell_units = stock
         stock -=sell_units
         revenue = sell_units*price
         cash += revenue
         holding = stock*price
         print('index %d: sell %d units at price %f, current cash %f, current stock %f,current holding %f' % (i, sell_units, price, cash, stock, holding))
         return cash, stock, holding
    for i in range(0,management.shape[0]):
      
      state = states[i]
      price = prices[i]
      if state == 1:
        cash, stock, holding = buy(i, cash, stock, price)
        states_buy.append(i)
      elif state == -1:
        cash, stock, holding = sell(i,cash, stock, price)
      states_sell.append(i)
      cashes.append(cash)
      stocks.append(stock)
      holdings.append(holding)
    
    management['cash']=cashes
    management['stock']=stocks
    management['holding']=holdings
    management['total']=management['cash']+management['holding']
  

    management['roi']=(management['total']-self.initial_capital)/self.initial_capital
    management["roi_buy&hold"]= (management["close"]-management["close"][0])/management["close"][0]

    management["sharpe"] = ""
    management["sharpe_buy&hold"] = ""

    for i in range(0, management.shape[0]):
      if management["roi"].iloc[0:i].std()==0:
        management["sharpe"][i]=0
      else:
        management["sharpe"][i]=np.mean(management["roi"].iloc[0:i])/management["roi"].iloc[0:i].std()

    for i in range(0, management.shape[0]):
      if management["roi_buy&hold"].iloc[0:i].std()==0:
        management["sharpe_buy&hold"][i]=0
      else:
        management["sharpe_buy&hold"][i]=np.mean(management["roi_buy&hold"].iloc[0:i])/management["roi_buy&hold"].iloc[0:i].std() 

    fig, (ax1, ax2, ax3) = plt.subplots(3,1, figsize = (15,20))
    ax1.plot(management['holding'], label='Holdings', color='r')
    ax1.plot(management['cash'], label="Cash", color='g')
    ax1.plot(management['total'], label="Total", color='b')
    ax1.set_title("Visualization of #"+ self.topic +" Portfolio Flows",fontsize=15)
    ax1.set_xlabel('Days',fontsize=10)
    ax1.set_ylabel('Total Holdings',fontsize=10)
    ax1.legend()
    
    ax2.plot(management["roi"],label="ROI_SMA_algorithm", color="b")
    ax2.plot(management["roi_buy&hold"],label="ROI_buy&hold", color="r")
    ax2.set_title("Comparison of #" + self.topic + " ROI on SMA and Simple Buy&Hold Strategy",fontsize=15)
    ax2.set_ylabel('ROI',fontsize=10)
    ax2.set_xlabel('Days',fontsize=10)
    ax2.legend()

    ax3.plot(management["sharpe"],label="sharpe_SMA_algorithm", color="b")
    ax3.plot(management["sharpe_buy&hold"],label="sharpe_buy&hold", color="r")
    ax3.set_title("Comparison of #" + self.topic +" Sharpe Ratio on SMA and Simple Buy&Hold Strategy",fontsize=15)
    ax3.set_ylabel('Sharpe Ratio',fontsize=10)
    ax3.set_xlabel('Days',fontsize=10)
    ax3.legend()

    return management

"""Return plots with input data.

We use Tesla (TSLA) and Apple (AAPL) with 5-min time interval as case studies.

Short_window = 50 

Long_window = 200
"""

SMA_predict_TSLA_Portfolio = portfolio(SMA_Signal_predict_TSLA, 'TSLA')
TSLA_predict_Portfolio = SMA_predict_TSLA_Portfolio.portfolios()
TSLA_predict_Portfolio.head()

SMA_predict_AAPL_Portfolio = portfolio(SMA_Signal_predict_AAPL, 'AAPL')
AAPL_predict_Portfolio = SMA_predict_AAPL_Portfolio.portfolios()
AAPL_predict_Portfolio.head()

