python/wywongbd/pairstrade-fyp-2019/flask/static/plots/client_demo.py

client_demo.py
# utilities
import os
import sys
import glob
import logging
import traceback
import pandas as pd
import numpy as np
from datetime import date, datetime

pd.set_option('display.max_columns', 500)

# figure plotting
import bokeh.models as bkm
from bokeh.io import show, curdoc
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, RangeTool, DatetimeTickFormatter, LabelSet
from bokeh.plotting import figure, show
from bokeh.client import push_session, pull_session

# bokeh widgets
from bokeh.layouts import column, widgetbox
from bokeh.models.widgets import Button, Select, DateRangeSlider, TableColumn, DataTable

# import backtesting script
sys.path.append('./jupyter_py')
sys.path.append('./process_data')
sys.path.append('./log_helper')
sys.path.append('./model')

from decode_logs import Decoder, get_current_time
from rl_train import run_rl_backtest

RL_period_idx = 3
FIRST_ITER = [True, True]

if FIRST_ITER[0] and FIRST_ITER[1]:
    # use this dictionary to store all backtesting params
    backtest_params = {
        "strategy_type": "kalman",
        "stk_0": "AAN",
        "stk_1": "AER",
        "backtest_start": "2018-03-20",
        "backtest_end": "2019-01-03",
        "max_start": "2014-01-01",
        "max_end": "2019-01-03"
    }
    
def build_price_and_spread_fig(data, action_df):
    logging.info("build_price_and_spread_fig(): BEGIN ")
    
    # ========== themes & appearance ============= #
    STK_1_LINE_COLOR = "#053061"
    STK_2_LINE_COLOR = "#67001f"
    STK_1_LINE_WIDTH = 1.5
    STK_2_LINE_WIDTH = 1.5
    WINDOW_SIZE = 10
    TITLE = "PRICE OF {} vs {}".format(backtest_params["stk_0"], backtest_params["stk_1"]) 
    HEIGHT = 250
    SLIDER_HEIGHT = 150
    WIDTH = 600

    # ========== data ============= #
    # use sample data from ib-data folder
    dates = np.array(data['date'], dtype=np.datetime64)
    STK_1_source = ColumnDataSource(data=dict(date=dates, close=data['data0']))
    STK_2_source = ColumnDataSource(data=dict(date=dates, close=data['data1']))

    # ========== plot data points ============= #
    # x_range is the zoom in slider setup. Pls ensure both STK_1 and STK_2 have same length, else some issue
    normp = figure(plot_height=HEIGHT, 
                   plot_width=WIDTH, 
                   x_range=(dates[-WINDOW_SIZE], dates[-1]), 
                   title=TITLE, 
                   toolbar_location=None)

    normp.line('date', 'close', source=STK_1_source, line_color = STK_1_LINE_COLOR, line_width = STK_1_LINE_WIDTH)
    normp.line('date', 'close', source=STK_2_source, line_color = STK_2_LINE_COLOR, line_width = STK_2_LINE_WIDTH)
    normp.yaxis.axis_label = 'Price'

    normp.xaxis[0].formatter = DatetimeTickFormatter()
    
    # ========== render spread stuff ============= #
    
    palette = ["#053061", "#67001f"]
    LINE_WIDTH = 1.5
    LINE_COLOR = palette[-1]
    SPREAD_TITLE = "RULE BASED SPREAD TRADING"
    HEIGHT = 250
    WIDTH = 600

    # ========== data ============= #
    # TODO: get action_source array
    # TODO: map actions to colours so can map to palette[i]
    spread_source = None
    
    try:
        spread_source = ColumnDataSource(data=dict(date=dates, 
                                                   spread=data['spread'],
                                                   upper_limit=data['upper_limit'], 
                                                   lower_limit=data['lower_limit']))
    except:
        spread_source = ColumnDataSource(data=dict(date=dates, 
                                                   spread=data['spread']))
    
    # action_source['colors'] = [palette[i] x for x in action_source['actions']]

    # ========== figure INTERACTION properties ============= #
    TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

    spread_p = figure(tools=TOOLS, 
                      toolbar_location=None, 
                      plot_height=HEIGHT, 
                      plot_width=WIDTH, 
                      title=SPREAD_TITLE, 
                      x_range=(dates[-WINDOW_SIZE], dates[-1]))
    # spread_p.background_fill_color = "#dddddd"
    spread_p.xaxis.axis_label = "Backtest Period"
    spread_p.yaxis.axis_label = "Spread"
    # spread_p.grid.grid_line_color = "white"

    # ========== plot data points ============= #
    # plot the POINT coords of the ACTIONS
    if len(action_df) > 0:
        logging.info("BUILDING CIRCLES")
        logging.info("ACTION_DF: {}".format(action_df.head()))
        action_source = ColumnDataSource(action_df)
        circles = spread_p.circle("date", "spread", size=12, source=action_source, fill_alpha=0.8)

        circles_hover = bkm.HoverTool(renderers=[circles], tooltips = [
            ("Action", "@latest_trade_action"),                    
            ("Stock Bought", "@buy_stk"),
            ("Bought Amount", "@buy_amt"),
            ("Stock Sold", "@sell_stk"),
            ("Sold Amount", "@sell_amt")
            ])
        
        spread_p.add_tools(circles_hover)

    # plot the spread over time
    spread_p.line('date', 'spread', source=spread_source, line_color = LINE_COLOR, line_width = LINE_WIDTH)
    
    if ('upper_limit' in data.columns) and ('lower_limit' in data.columns):
        spread_p.line('date', 'upper_limit', source=spread_source, line_color = "#FFA500", line_width = LINE_WIDTH)
        spread_p.line('date', 'lower_limit', source=spread_source, line_color = "#FFA500", line_width = LINE_WIDTH)
    
    spread_p.xaxis[0].formatter = DatetimeTickFormatter()

    # ========== RANGE SELECT TOOL ============= #

    select = figure(title="Drag the middle and edges of the selection box to change the range above",
                    plot_height=SLIDER_HEIGHT, plot_width=WIDTH, y_range=normp.y_range,
                    x_axis_type="datetime", y_axis_type=None,
                    tools="", toolbar_location='above', background_fill_color="#efefef")

    range_tool = RangeTool(x_range=normp.x_range)
    range_tool.overlay.fill_color = "navy"
    range_tool.overlay.fill_alpha = 0.2
    
    range_tool_spread = RangeTool(x_range=spread_p.x_range)

    select.line('date', 'close', source=STK_1_source, line_color = STK_1_LINE_COLOR, line_width = STK_1_LINE_WIDTH)
    select.line('date', 'close', source=STK_2_source, line_color = STK_2_LINE_COLOR, line_width = STK_2_LINE_WIDTH)
    select.ygrid.grid_line_color = None
    select.add_tools(range_tool)
    select.add_tools(range_tool_spread)
    select.toolbar.active_multi = range_tool
    
    logging.info("build_price_and_spread_fig(): END ") 
    return column(normp, spread_p, select)

def build_pv_fig(data):
    # ========== themes & appearance ============= #
    LINE_COLOR = "#053061"
    LINE_WIDTH = 1.5
    TITLE = "PORTFOLIO VALUE OVER TIME" 

    # ========== data ============= #
    dates = np.array(data['date'], dtype=np.datetime64)
    pv_source = ColumnDataSource(data=dict(date=dates, portfolio_value=data['portfolio_value']))

    # ========== plot data points ============= #
    # x_range is the zoom in slider setup. Pls ensure both STK_1 and STK_2 have same length, else some issue
    pv_p = figure(plot_height=250, plot_width=600, title=TITLE, toolbar_location=None)
    pv_p.line('date', 'portfolio_value', source=pv_source, line_color = LINE_COLOR, line_width = LINE_WIDTH)
    pv_p.yaxis.axis_label = 'Portfolio Value'
    pv_p.xaxis[0].formatter = DatetimeTickFormatter()
    return pv_p

def build_widgets_wb(stock_list, metrics):
    # CODE SECTION: setup buttons, widgetbox name = controls_wb
    WIDGET_WIDTH = 250

    # ========== Select Stocks ============= #
    select_stk_1 = Select(width = WIDGET_WIDTH, title='Select Stock 1:', value = backtest_params["stk_0"], options=stock_list)
    select_stk_2 = Select(width = WIDGET_WIDTH, title='Select Stock 2:', value = backtest_params["stk_1"], options=stock_list)

    # ========== Strategy Type ============= #
    strategy_list = ['kalman', 'distance', 'cointegration', 'reinforcement learning']
    select_strategy = Select(width = WIDGET_WIDTH, title='Select Strategy:', value = backtest_params["strategy_type"], options=strategy_list)

    # ========== set start/end date ============= #
    # date time variables
    MAX_START = datetime.strptime(backtest_params["max_start"], "%Y-%m-%d").date()
    MAX_END = datetime.strptime(backtest_params["max_end"], "%Y-%m-%d").date()
    DEFAULT_START = datetime.strptime(backtest_params["backtest_start"], "%Y-%m-%d").date()
    DEFAULT_END = datetime.strptime(backtest_params["backtest_end"], "%Y-%m-%d").date()
    STEP = 1

    backtest_dates = DateRangeSlider(width = WIDGET_WIDTH, 
                                     start=MAX_START, end=MAX_END, 
                                     value=(DEFAULT_START, DEFAULT_END), 
                                     step=STEP, title="Backtest Date Range:")

    start_bt = Button(label="Backtest", button_type="success", width = WIDGET_WIDTH)

    # controls = column(select_stk_1, select_stk_2, select_strategy, backtest_dates, start_bt)
    controls_wb = widgetbox(select_stk_1, select_stk_2, select_strategy, backtest_dates, start_bt, width=300)

    # CODE SECTION: setup table, widgetbox name = metrics_wb
    master_wb = None
    if metrics is not None:
        metric_source = ColumnDataSource(metrics)
        metric_columns = [
            TableColumn(field="Metrics", title="Metrics"),
            TableColumn(field="Value", title="Performance"),
        ]

        metric_table = DataTable(source=metric_source, columns=metric_columns, width=300)
        master_wb = row(controls_wb, widgetbox(metric_table))
        
    else:
        logging.info("creating controls without table")
        master_wb = row(controls_wb)
    return master_wb, select_stk_1, select_stk_2, select_strategy, backtest_dates, start_bt

if FIRST_ITER[0]:
    output_dir = "./jupyter_py/output/backtest-" + str(get_current_time())
    execution_command = """
    python ./jupyter_py/backtest_pair.py \
    --strategy_type {} \
    --output_dir {} \
    --backtest_start {} \
    --backtest_end {} \
    --stk0 {} \
    --stk1 {}  
    """
    # if backtest_params["strategy_type"] == "kalman":
    #     execution_command += " --kalman_estimation_length 200"
    if backtest_params["strategy_type"] == "cointegration":
        execution_command += " --lookback 76"
    elif backtest_params["strategy_type"] == "distance":
        execution_command += " --lookback 70"

    execution_command = execution_command.format(backtest_params["strategy_type"], 
                                                output_dir,
                                                backtest_params["backtest_start"],
                                                backtest_params["backtest_end"],
                                                backtest_params["stk_0"],
                                                backtest_params["stk_1"])

    os.system(execution_command)

    stock_list = glob.glob("./data/nyse-daily-tech/*.csv")
    for i, file in enumerate(stock_list):
        stock_list[i] = os.path.basename(file)[:-4]

    # get results from log file
    backtest_df, trades_df = Decoder.get_strategy_status(output_dir)
    metrics_dict = Decoder.get_strategy_performance(str(output_dir))
    metrics_pd = pd.DataFrame.from_dict(metrics_dict, orient='index', columns=['Value']).reset_index()
    metrics_pd.columns = ['Metrics', 'Value']

    # build figures
    spread_fig = build_price_and_spread_fig(backtest_df, trades_df)
    pv_fig = build_pv_fig(backtest_df)
    master_wb, select_stk_1, select_stk_2, select_strategy, backtest_dates, start_bt = build_widgets_wb(stock_list, metrics_pd)
    FIRST_ITER[0] = False
    
def _run_backtest():
    run_backtest()

def run_backtest():
    logging.info("received signal")
    backtest_df, trades_df = None, None
    metrics_pd = None
    
    stock_list = glob.glob("./data/nyse-daily-tech/*.csv")
    for i, file in enumerate(stock_list):
        stock_list[i] = os.path.basename(file)[:-4]
    
    if backtest_params["strategy_type"] in set(["cointegration", "distance", "kalman"]):
        output_dir = "./jupyter_py/output/backtest-" + str(get_current_time())
        execution_command = """
        python ./jupyter_py/backtest_pair.py \
        --strategy_type {} \
        --output_dir {} \
        --backtest_start {} \
        --backtest_end {} \
        --stk0 {} \
        --stk1 {} 
        """
    #     if backtest_params["strategy_type"] == "kalman":
    #         execution_command += " --kalman_estimation_length 200"
        if backtest_params["strategy_type"] == "cointegration":
            execution_command += " --lookback 76"
        elif backtest_params["strategy_type"] == "distance":
            execution_command += " --lookback 70"

        execution_command = execution_command.format(backtest_params["strategy_type"], 
                                                    output_dir,
                                                    backtest_params["backtest_start"],
                                                    backtest_params["backtest_end"],
                                                    backtest_params["stk_0"],
                                                    backtest_params["stk_1"])
        os.system(execution_command)

        # get results from log file
        backtest_df, trades_df = Decoder.get_strategy_status(output_dir)
        metrics_dict = Decoder.get_strategy_performance(str(output_dir))
        metrics_pd = pd.DataFrame.from_dict(metrics_dict, orient='index', columns=['Value']).reset_index()
        metrics_pd.columns = ['Metrics', 'Value']
        
    else:
        # perform RL backtest
        logging.info("{}".format("PERFORMING RL CALCULATION"))
        
        try:
            backtest_df, trades_df = run_rl_backtest(backtest_params["stk_0"], backtest_params["stk_1"], RL_period_idx)
        except Exception as e:
            logging.warning("{}".format(e))
            logging.warning("{}".format(traceback.format_exc()))
        
        logging.info("{}".format("FINISH RL CALCULATION"))

        logging.info("done RL")
        metrics_ls = [{'Metrics': 'Sharpe Ratio', 'Value': None}]
        metrics_pd = pd.DataFrame(metrics_ls)
        metrics_pd.columns = ['Metrics', 'Value']
    
    logging.info("DONE CALCULATION")
    logging.info("{}".format(backtest_df.columns))
    logging.info("{}".format(trades_df.columns))
    logging.info("{}".format(backtest_df.head()))
    logging.info("{}".format(trades_df.head()))
    
    # build figures
    spread_fig = build_price_and_spread_fig(backtest_df, trades_df)
    pv_fig = build_pv_fig(backtest_df)
    master_wb, select_stk_1, select_stk_2, select_strategy, backtest_dates, start_bt = build_widgets_wb(stock_list, metrics_pd)
    
    def update_stk_1(attrname, old, new):
        backtest_params['stk_0'] = select_stk_1.value
    
    def update_stk_2(attrname, old, new):
        backtest_params['stk_1'] = select_stk_2.value

    def update_strategy(attrname, old, new):
        backtest_params['strategy_type'] = select_strategy.value

    def update_dates(attrname, old, new):
        val = list(backtest_dates.value)
    
    select_stk_1.on_change('value', update_stk_1)
    select_stk_2.on_change('value', update_stk_2)
    select_strategy.on_change('value', update_strategy)
    backtest_dates.on_change('value', update_dates)
    start_bt.on_click(_run_backtest)
    
    left = column(master_wb, pv_fig)
    grid = row(left, spread_fig)
    curdoc().clear()
    curdoc().add_root(grid)
    logging.info("really done all")
    logging.info("FIRST_ITER: {}".format(FIRST_ITER))
    logging.info("Grid: {}".format(grid))

if FIRST_ITER[1]:
    def update_stk_1(attrname, old, new):
        backtest_params['stk_0'] = select_stk_1.value
    
    def update_stk_2(attrname, old, new):
        backtest_params['stk_1'] = select_stk_2.value

    def update_strategy(attrname, old, new):
        backtest_params['strategy_type'] = select_strategy.value

    def update_dates(attrname, old, new):
        val = list(backtest_dates.value)
        # backtest_params['backtest_start'] = str(datetime.datetime.fromtimestamp(val[0]).date())
        # backtest_params['backtest_end'] = str(datetime.datetime.fromtimestamp(val[1]).date())
    
    # behavior
    select_stk_1.on_change('value', update_stk_1)
    select_stk_2.on_change('value', update_stk_2)
    select_strategy.on_change('value', update_strategy)
    backtest_dates.on_change('value', update_dates)
    start_bt.on_click(run_backtest)

    # build_final_gridplot
    left = column(master_wb, pv_fig)
    grid = row(left, spread_fig)
    curdoc().add_root(grid)
    FIRST_ITER[1] = False