Skip to main content

Event-driven Python backtesting engine - Overview - Part 2

·1366 words·7 mins
Anthony Ori
Author
Anthony Ori
~ tinkerer ~

As mentioned in the previous article, we will cover at a high level the code in backtester.py.

Setup
#

At the top we have the import statements,

# trading_engine/backtester.py

import datetime
import queue

from data import HistoricCsvDataHandler
from strategy import BuyAndHoldStrategy, StatArbStrategy
from portfolio import BuyAndHoldPortfolio
from broker import SimulatedExecutionHandler
from utils import *

In the actual execution block,

if __name__ == "__main__":
    parsed_args = cli_parser()
    asset_class = asset_class_selector(parsed_args)
    data_dir = data_dir_setup(asset_class)

    # rest of code [...]

The first three lines that deal with parsing the stocks inserted in the command line, identifying the asset class (defaults to equities), and setting up the data directory to read the .csv files from (in the case of the CSV Data Handler). These functions are defined in utils.py as such:

Details
# trading_engine/utils.py

def cli_parser(args=None):
    """
    Parses the asset class from the command line, if any.

    :return: Parsed arguments accessible with dot notation -> Namespace-like object
    """
    parser = argparse.ArgumentParser(description="*** Event-driven Backtester ***")
    parser.add_argument("--asset_class", type=str, help="Asset type, i.e. 'stocks', 'commodities', 'currencies'",
                        default="stocks")
    parser.add_argument("--tickers", type=str, help="Instrument's ticker, i.e. AAPL, etc.", nargs='*', action='append')

    allowed_assets = ["stocks", "equities"]

    asset_class = parser.parse_args(args).asset_class.lower()
    if asset_class not in allowed_assets:
        # raises a SystemExit error
        parser.error(f"Asset class '{asset_class}' not currently supported.\n"
                     f"Supported types are: stocks for now (more to come in due time")

    if parser.parse_args(args).tickers is None:
        parser.error("No ticker provided -- please provide at least one valid ticker\n"
                     "You can add more tickers separated by a space, i.e. AAPL GOOGL, etc.")

    return parser.parse_args(args)


def asset_class_selector(parsed_args):
    asset_class = parsed_args.asset_class
    if asset_class == "equities":
        asset_class = "stocks"
    return asset_class


def data_dir_setup(asset_class):
    """
    Sets the data source directory where the Data Handler will get data from.

    :return: source directory the Data handler will use -> Path or Path-like object
    """
    source_dir = Path.cwd().joinpath("datasets")

    if asset_class == "stocks" or asset_class == "etfs" :
        assets_dir = source_dir.joinpath("equities").joinpath(asset_class)
    else:
        assets_dir = source_dir.joinpath(asset_class)

    return assets_dir

To make sense of the data_dir_setup function, it helps to see the directory structure it expects:

/
├── datasets
│   ├── bonds
│   ├── commodities
│   ├── currencies
│   └── equities
│       ├── etfs
│       └── stocks

We’ll focus on the stocks folder for the rest of the series, but the above will work with other asset types, if those other directories are populated with relevant .csv or .txt data, in the format <ticker>.csv.

if __name__ == "__main__":
    # [...]    

    # main, and only, event queue (FIFO)
    events = queue.Queue()

    # input symbols
    symbols = symbols_filterer(parsed_args.tickers[0], data_dir)

    # engine components
    bars = HistoricCsvDataHandler(events, data_dir, symbols)
    buy_hold_strategy = BuyAndHoldStrategy(bars, events)
    buy_hold_portfolio = BuyAndHoldPortfolio(bars, events, datetime.datetime(1960, 1, 1).strftime("%Y-%m-%d"))
    broker = SimulatedExecutionHandler(events)

In the above we define a Queue object. This is from the standard library and internally handles synchronisation by blocking/locking out competing threads. This queue object will be passed around to each of the engine components so they can independently add events to the queue.

Next is the symbols_filterer which is another function defined in utils.py that takes the tickers passed by the user from the command line, and filters out tickers that are not present in the data_dir:

# trading_engine/utils.py

[...]

def symbols_filterer(tickers, datasource):
    """
    Checks if the symbols provided are available in the datasource,
    and return a filtered list of only the valid ones. Valid tickers
    being ones that are available in the local datasource.

    Currently only supports datasource as a local directory.

    :param tickers: list of symbols -> List[str]
    :param datasource: repository where data is to be retrieved from -> Path-like object | Iterable
    :return: filtered list of the valid tickers ['AAPL', 'GOOGL'] -> List[str]
    """

    filtered_list = []
    for ticker_file in datasource.iterdir():
        ticker = ticker_file.stem.split(".")[0].upper()
        if ticker in tickers:
            filtered_list.append(ticker)

    # none of the tickers the user provided are available or valid; provide a safe default
    if not filtered_list:
        print("None of the tickers provided are valid or available.\nPlease Try again with others.\n"
              "Defaulting to 'AMD'\n")
        filtered_list.append('AMD')

    return filtered_list

So you might run the following in the command line:

$ python backtester.py --tickers AAPL MSFT NONSENSE

but only Apple and Microsoft will be considered since NONSENSE is not a ticker available in the local datasource.

The last part shows the engine components themselves.

Execution
#

if __name__ == "__main__":
    # [...]
    
    # main execution loop
    while True:
        if bars.continue_backtest:
            bars.update_bars()
        else:
            print("End of backtesting...")
            buy_hold_portfolio.create_equity_curve_dataframe()
            print("\n **** Portfolio statistics **** \n")
            for stat in buy_hold_portfolio.output_summary_stats():
                print(stat)
            break

        while True:
            try:
                event = events.get(False)
            except queue.Empty:
                break
            else:
                if event is not None:
                    if event.type == "MARKET":
                        buy_hold_strategy.calculate_signals(event)
                        buy_hold_portfolio.update_timeindex(event)

                    elif event.type == "SIGNAL":
                        buy_hold_portfolio.update_signal(event)

                    elif event.type == "ORDER":
                        broker.execute_order(event)

                    elif event.type == "FILL":
                        buy_hold_portfolio.update_fill(event)

The outer loop runs till there’s no more backtesting data. The inner loop runs infinitely on the condition that the outer one does too. In a backtesting environment the outer loop will have an end, but in a live-trading environment, the outer loop will also run infinitely or with a very long-lived condition according to the trading/investment strategy.

While the bars.continue_backtest condition is true, bars.update_bars() will add a MarketEvent to the queue when a new data bar “arrives” from the data feed (to simulate how a market feed “drips” data progressively). Said MarketEvent will trigger the chain of events in the else condition of the inner loop.

In the next article we will work on the data layer in more detail, but before that we will look at tests for the utility functions in utils.py mentioned earlier. We don’t need to do this now, but it helps get into the frame of mind of writing tests for code as you’re working on it.

Testing the utility functions in the Setup section
#

I’ll use pytest as test runner. It looks for the tests folder from the root of the project, thus:

/
├── tests
│   ├── __init__.py
│   └── test_utils.py

The convention is to name the test file after the application code it tests, so for utils.py you should have a test file named test_utils.py:

# trading_engine/tests/test_utils.py

import pytest

from types import SimpleNamespace
from utils import *


@pytest.mark.parametrize('args', [
    ['--tickers', 'AMD'],
])
def test_asset_class_parser_default(args):
    parser = cli_parser(args)  # no asset_class arg provided
    assert parser.asset_class == 'stocks'

The test above checks that the backtester defaults to stocks as asset class when the arg --asset_class is not provided in the command line.

The idea of automated tests is that they should be descriptive, have a clear scope, and should run fast. They provide a sanity check and improve developer confidence because you can fix bugs or develop new features with the assurance that if something breaks you can look at the test suite to narrow down what is broken or reveal gaps in the application logic or test coverage.

Now that we’ve tested the default condition of the Command Line Interface (CLI) parser, we can test two other paths: one where we provide a supported asset class, and another where the asset class is not supported, i.e. any other asset class that is not stocks:

@pytest.mark.parametrize('args', [
    ["--asset_class", "stocks", "--ticker", "AMD"],
    ["--asset_class", "equities", "--ticker", "AMD"],
])
def test_asset_class_parser_with_supported_assets(args):
    parser = cli_parser(args)
    assert parser.asset_class == args[1]


@pytest.mark.parametrize('args', [
    ['--asset_class', 'stonks'],
    ['--asset_class', 'super stonks'],
    ['--asset_class', 'currencies'],
    ['--asset_class', 'commodities']
])
def test_asset_class_parser_with_unsupported_assets(args):
    with pytest.raises(SystemExit):
        parser = cli_parser(args)

And so forth with other functions within utils.py. By the end of the exercise you should have something like this:

Details
# trading_engine/tests/test_utils.py

import pytest

from types import SimpleNamespace
from utils import *


@pytest.mark.parametrize('args', [
    ['--tickers', 'AMD'],
])
def test_asset_class_parser_default(args):
    parser = cli_parser(args)  # no asset_class arg provided
    assert parser.asset_class == 'stocks'


@pytest.mark.parametrize('args', [
    ["--asset_class", "stocks", "--ticker", "AMD"],
    ["--asset_class", "equities", "--ticker", "AMD"],
])
def test_asset_class_parser_with_supported_assets(args):
    parser = cli_parser(args)
    assert parser.asset_class == args[1]


@pytest.mark.parametrize('args', [
    ['--asset_class', 'stonks'],
    ['--asset_class', 'currencies'],
    ['--asset_class', 'commodities']
])
def test_asset_class_parser_with_unsupported_assets(args):
    with pytest.raises(SystemExit):
        parser = cli_parser(args)


@pytest.mark.parametrize('parsed_args', [
    SimpleNamespace({'asset_class': 'stonks'}),
    SimpleNamespace({'asset_class': 'options'}),
    SimpleNamespace({'asset_class': 'bonds'}),
])
def test_asset_class_selector(parsed_args):
    asset_class = asset_class_selector(parsed_args)
    assert parsed_args.asset_class == asset_class


@pytest.mark.parametrize('asset_class', [
    'stocks',
    'etfs',
])
def test_data_dir_setup_equities(asset_class):
    asset_dir = data_dir_setup(asset_class)
    assert asset_dir.stem == asset_class
    assert asset_dir.parent.stem == "equities"


@pytest.mark.parametrize('asset_class', [
    'currencies',
    'commodities',
    'bonds',
    'options',
])
def test_data_dir_setup_non_equities(asset_class):
    asset_dir = data_dir_setup(asset_class)
    assert asset_dir.stem == asset_class
    assert asset_dir.parent.stem == "datasets"


def test_symbols_filterer_with_unavailable_ticker():
    tickers = ['AMD', 'INTC', 'INVALID_TICKER']
    datasource = data_dir_setup("stocks")
    symbols_list = symbols_filterer(tickers, datasource)
    assert "INVALID_TICKER" not in symbols_list


def test_symbols_filterer_with_available_ticker():
    tickers = ['AMD', 'INTC', 'MSFT']
    datasource = data_dir_setup("stocks")
    symbols_list = symbols_filterer(tickers, datasource)
    assert "MSFT" in symbols_list

Feel free to share the article on socials: