From 5b7e9956b2c71b6a5224223d7251cf58dd973004 Mon Sep 17 00:00:00 2001
From: Pedro Capelastegui <pcapelastegui@gmail.com>
Date: Mon, 24 Sep 2018 15:47:03 +0100
Subject: [PATCH] closes #3; (#4)

---
 CONTRIBUTING.md                 |   52 +
 anticipy/__init__.py            |    3 +
 anticipy/app.py                 |  124 ++
 anticipy/forecast.py            | 1173 +++++++++++++++
 anticipy/forecast_models.py     | 1549 +++++++++++++++++++
 anticipy/forecast_plot.py       |  266 ++++
 anticipy/model_utils.py         |  291 ++++
 anticipy/utils_test.py          |  122 ++
 setup.py                        |    6 +-
 tests/__init__.py               |    1 +
 tests/data/candy_production.csv |  549 +++++++
 tests/data/df_test_naive.csv    |   79 +
 tests/data/df_test_naive2.csv   |   58 +
 tests/data/test_normalize.csv   |   45 +
 tests/test_forecast.py          | 2513 +++++++++++++++++++++++++++++++
 tests/test_forecast_model.py    |  900 +++++++++++
 tests/test_forecast_plot.py     |  157 ++
 tests/test_model_utils.py       |  192 +++
 18 files changed, 8077 insertions(+), 3 deletions(-)
 create mode 100644 CONTRIBUTING.md
 create mode 100644 anticipy/__init__.py
 create mode 100644 anticipy/app.py
 create mode 100644 anticipy/forecast.py
 create mode 100644 anticipy/forecast_models.py
 create mode 100644 anticipy/forecast_plot.py
 create mode 100644 anticipy/model_utils.py
 create mode 100644 anticipy/utils_test.py
 create mode 100644 tests/__init__.py
 create mode 100755 tests/data/candy_production.csv
 create mode 100644 tests/data/df_test_naive.csv
 create mode 100644 tests/data/df_test_naive2.csv
 create mode 100644 tests/data/test_normalize.csv
 create mode 100644 tests/test_forecast.py
 create mode 100644 tests/test_forecast_model.py
 create mode 100644 tests/test_forecast_plot.py
 create mode 100644 tests/test_model_utils.py

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..172bf4a
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,52 @@
+# Contributing
+
+Contributions are welcomed! 
+
+When contributing to this repository, please first discuss the change you wish to make via GitHub
+issue before making a change.  This saves everyone from wasted effort in the event that the proposed
+changes need some adjustment before they are ready for submission.
+
+## Pull Request Process
+
+1. If your changes include multiple commits, please squash them into a single commit.  Stack Overflow
+   and various blogs can help with this process if you're not already familiar with it.
+2. Update the README.md where relevant.
+3. You may merge the Pull Request in once you have the sign-off of two other developers, or if you 
+   do not have permission to do that, you may request the second reviewer to merge it for you.
+
+## Contributor Code of Conduct
+
+As contributors and maintainers of this project, and in the interest of fostering an open and 
+welcoming community, we pledge to respect all people who contribute through reporting issues, 
+posting feature requests, updating documentation, submitting pull requests or patches, and other 
+activities.
+
+We are committed to making participation in this project a harassment-free experience for everyone, 
+regardless of level of experience, gender, gender identity and expression, sexual orientation, 
+disability, personal appearance, body size, race, ethnicity, age, religion, or nationality.
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery
+* Personal attacks
+* Trolling or insulting/derogatory comments
+* Public or private harassment
+* Publishing other's private information, such as physical or electronic addresses, without explicit
+  permission
+* Other unethical or unprofessional conduct.
+
+Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, 
+code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. By 
+adopting this Code of Conduct, project maintainers commit themselves to fairly and consistently 
+applying these principles to every aspect of managing this project. Project maintainers who do not 
+follow or enforce the Code of Conduct may be permanently removed from the project team.
+
+This code of conduct applies both within project spaces and in public spaces when an individual is 
+representing the project or its community.
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an 
+issue or contacting one or more of the project maintainers.
+
+This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), 
+version 1.2.0, available at 
+[http://contributor-covenant.org/version/1/2/0/](http://contributor-covenant.org/version/1/2/0/)
\ No newline at end of file
diff --git a/anticipy/__init__.py b/anticipy/__init__.py
new file mode 100644
index 0000000..b8d1eaf
--- /dev/null
+++ b/anticipy/__init__.py
@@ -0,0 +1,3 @@
+import pkg_resources
+__version__ = pkg_resources.require(__name__)[0].version
+del pkg_resources
diff --git a/anticipy/app.py b/anticipy/app.py
new file mode 100644
index 0000000..e8e8723
--- /dev/null
+++ b/anticipy/app.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+#
+# License:          This module is released under the terms of the LICENSE file 
+#                   contained within this applications INSTALL directory
+
+"""
+    __high_level_module_description_here__
+"""
+
+# -- Coding Conventions
+#    http://www.python.org/dev/peps/pep-0008/   -   Use the Python style guide
+#    http://sphinx.pocoo.org/rest.html          -   Use Restructured Text for docstrings
+
+# -- Public Imports
+import logging
+import pandas as pd
+import os
+import forecast
+import forecast_plot
+import argparse
+
+# -- Private Imports
+
+# -- Globals
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+# -- Exception classes
+
+
+# -- Functions
+def logger_info(msg, data):
+    # Convenience function for easier log typing
+    logger.info(msg + '\n%s', data)
+
+
+def run_forecast_app(path_in, path_out=None, forecast_years=2.0,
+                     col_name_y='y', col_name_weight='weight',
+                     col_name_x='x', col_name_date='date',
+                     col_name_source='source',
+                     include_all_fits=False
+                     ):
+    assert path_in is not None and os.path.exists(path_in), 'path_in needs to be a string pointing to a valid file path'
+    assert not os.path.isdir(path_in)
+
+    file_name = os.path.basename(path_in)
+    file_name_p1 = file_name.split('.')[0]
+
+    logger_info('file_name', file_name)
+    logger_info('file_name p1', file_name_p1)
+
+    if path_out is None:
+        path_out = path_in
+    assert os.path.exists(path_out)
+
+    path_folder = os.path.dirname(path_out)
+
+    logger_info('dir name', path_folder)
+
+    path_data = os.path.join(path_folder, file_name_p1+'_fcast.csv')
+    path_metadata = os.path.join(path_folder, file_name_p1+'_metadata.csv')
+    path_plot = os.path.join(path_folder, file_name_p1 + '_fcast.png')
+
+    logger_info('path_data', path_data)
+    logger_info('path_metadata', path_metadata)
+    logger_info('path_plot', path_plot)
+
+    df_y = pd.read_csv(path_in)
+
+    if col_name_date in df_y:  # Need to parse date
+        df_y[col_name_date] = df_y[col_name_date].pipe(pd.to_datetime)
+
+    df_y = forecast.normalize_df(df_y, col_name_y, col_name_weight, col_name_x, col_name_date,
+                                 col_name_source)
+
+    dict_result = forecast.run_forecast(df_y, extrapolate_years=forecast_years, simplify_output=False,
+                                        include_all_fits=include_all_fits)
+
+    df_result = dict_result['data']
+    df_metadata = dict_result['metadata']
+    df_optimize_info = dict_result['optimize_info']
+
+    df_result.to_csv(path_data, index=False)
+    df_metadata.to_csv(path_metadata, index=False)
+
+    try:
+        forecast_plot.plot_forecast_save(df_result, path_plot, width=1920, height=1080)
+    except AssertionError:
+        logger.info("Couldn't generate plot - R not installed")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--path_in', help='Path of input .csv file')
+    parser.add_argument('--path_out', help='Path of output folder - defaults to folder of path_in', default=None)
+    parser.add_argument('--forecast_years', help='Years in forecast interval', default=2.0, type=float)
+    parser.add_argument('--col_name_y', help='Name of column for y', default='y')
+    parser.add_argument('--col_name_date', help='Name of column for date', default='date')
+    parser.add_argument('--col_name_weight', help='Name of column for weight', default='weight')
+    parser.add_argument('--col_name_source', help='Name of column for y', default='source')
+    parser.add_argument('--col_name_x', help='Name of column for x', default='x')
+    parser.add_argument('--include_all_fits', help='If true, output includes non-optimal models', action='store_true')
+
+    args = parser.parse_args()
+    logger.info('Input: path_in= %s', args.path_in)
+    logger.info('Input: path_out= %s', args.path_out)
+    logger.info('Input: col_name_y= %s', args.col_name_y)
+    logger.info('Input: col_name_date= %s', args.col_name_date)
+    logger.info('Input: col_name_x= %s', args.col_name_x)
+    logger.info('Input: col_name_weight= %s', args.col_name_weight)
+    logger.info('Input: col_name_source= %s', args.col_name_source)
+    logger.info('Input: include_all_fits= %s', args.include_all_fits)
+
+    run_forecast_app(args.path_in, args.path_out, args.forecast_years,
+                     args.col_name_y, args.col_name_weight, args.col_name_x, args.col_name_date,
+                     args.col_name_source, args.include_all_fits)
+
+    # run_forecast_app('/Users/pec21/Downloads/file1.csv','/Users/pec21/Downloads/',
+    #                   col_name_y='occup_erl', col_name_source='bend_name')
+
+
+# -- Main
+if __name__ == '__main__':
+    main()
diff --git a/anticipy/forecast.py b/anticipy/forecast.py
new file mode 100644
index 0000000..84a5331
--- /dev/null
+++ b/anticipy/forecast.py
@@ -0,0 +1,1173 @@
+# -*- coding: utf-8 -*-
+#
+# License:          This module is released under the terms of the LICENSE file 
+#                   contained within this applications INSTALL directory
+
+"""
+Functions to run forecast
+"""
+
+# -- Coding Conventions
+#    http://www.python.org/dev/peps/pep-0008/   -   Use the Python style guide
+#    http://sphinx.pocoo.org/rest.html          -   Use Restructured Text for docstrings
+
+# -- Public Imports
+import logging
+import numpy as np
+import pandas as pd
+import scipy
+from scipy import optimize
+import itertools
+
+
+# -- Private Imports
+from anticipy import forecast_models, model_utils
+
+# -- Globals
+from anticipy.model_utils import detect_freq
+
+logger = logging.getLogger(__name__)
+
+
+# -- Exception classes
+
+# -- Functions
+def logger_info(msg, data):
+    # Convenience function for easier log typing
+    logger.info(msg + '\n%s', data)
+
+
+# Utility functions
+def to_str_function_list(l_function):
+    if l_function is None:
+        return None
+    return [f.name if f is not None else None for f in l_function]
+
+def is_null_model(f_model):
+    return f_model.name == 'null'
+
+
+def _is_multi_ts(a):
+    return a.ndim > 1 and a.shape[1] > 1
+
+
+def _has_different_res_weights(res_weights):
+    # Check if a residuals parameter is a vector with length > 1
+    return res_weights is not None and hasattr(res_weights, "__getitem__") and len(res_weights) > 1
+
+
+# TODO: replace get_residuals with this
+def get_residuals(params, model, a_x, a_y, a_date, a_weights=None, filter_null_residuals=None, df_actuals=None):
+    """
+    Given a time series, a model function and a set of parameters, get the residuals
+
+    :param params: parameters for model function
+    :type params: numpy array
+    :param model: model function. Usage: model(a_x, a_date, params)
+    :type model: function
+    :param a_x: X axis for model function.
+    :type a_x: float array
+    :param a_y: Input time series values, to compare to the model function
+    :type a_y: float array
+    :param a_date: Dates for the input time series
+    :type a_date: datetime array
+    :param a_weights: weights for each individual sample
+    :type a_weights: numpy array
+
+    :return: array with residuals, same length as a_x, a_y
+    :rtype: float array
+    """
+    # Note: remove this assert for performance
+    assert a_y.ndim == 1
+    # Note: none of the input arrays should include NaN values
+    # We do not check this with asserts due to performance - this function is in the optimization loop
+
+    y_predicted = model(a_x, a_date, params, df_actuals=df_actuals)
+    residuals = (a_y - y_predicted)
+    if a_weights is not None:     # Do this only if different residual weights
+        residuals = residuals * a_weights
+    result = np.abs(residuals)
+    return result
+
+
+def optimize_least_squares(model, a_x, a_y, a_date, a_weights=None, f_t_scaling=None, df_actuals=None):
+    """
+    Given a time series and a model function, find the set of parameters that minimises residuals
+
+    :param model: model function, to be fitted against the actuals
+    :type model: function
+    :param a_x:
+    :type a_x: float array
+    :param a_y:
+    :type a_y: float array
+    :param a_date:
+    :type a_date: datetime array
+    :param res_weights:
+    :type res_weights:
+    :param use_t_scaling:
+    :type use_t_scaling:
+    :param bounds:
+    :type bounds: 2-tuple of array_like
+    :return:
+        | table(success, params, cost, optimality, iterations, status, jac_evals, message):
+        | - success (bool): True if successful fit
+        | - params (list): Parameters of fitted model
+        | - cost (float): Value of cost function
+        | - optimality(float)
+        | - iterations (int) : Number of function evaluations
+        | - status (int) : Status code
+        | - jac_evals(int) : Number of Jacobian evaluations
+        | - message (str) : Output message
+    :rtype: pandas.DataFrame
+    """
+    assert a_y.ndim == 1
+
+    # Check that input is sorted - not required - taken care by normalize_df()
+    # assert np.all(np.diff(a_x) >= 0), 'Input not sorted on x axis'
+
+    # Ask the model to provide an initial guess
+    initial_guess = model.f_init_params(a_x, a_y)
+
+    bounds = model.f_bounds(a_x, a_y)
+
+    assert forecast_models.validate_initial_guess(initial_guess, bounds), \
+        'Initial guess outside of bounds: {} - {}, {}'.format(model, initial_guess, bounds)
+
+    # In multi-ts scenarios, we apply this filter to ignore residuals for null y_values
+    filter_null_residuals = ~np.isnan(a_y)
+    if np.all(filter_null_residuals):
+        filter_null_residuals = None
+
+    # t_scaling: we use this to assign different weight to residuals based on date # TODO: Implement scaling functions
+    if f_t_scaling:
+        a_weights_tmp = f_t_scaling(a_x)
+        a_weights = a_weights_tmp if a_weights is None else a_weights*a_weights_tmp
+
+    # Set up arguments for get_residuals
+    f_model_args = (model, a_x, a_y, a_date)
+
+    result = scipy.optimize.least_squares(get_residuals, initial_guess,
+                                          args=f_model_args,
+                                          kwargs={'a_weights': a_weights, 'df_actuals':df_actuals},
+                                          # method='lm',
+                                          method='trf',
+                                          x_scale='jac',
+                                          # verbose=1,
+                                          bounds=bounds
+                                          )
+    dict_result_df = {
+        'optimality': result['optimality'],
+        'success': result['success'],
+        'cost': result['cost'],
+        'iterations': result['nfev'],
+        'jac_evals': result['njev'],
+        'status': result['status'],
+        'message': result['message'],
+        'params': [result['x']]
+    }
+    df_result = pd.DataFrame(data=dict_result_df, index=pd.Index([0]))
+    df_result = df_result[['success', 'params', 'cost', 'optimality', 'iterations', 'status', 'jac_evals', 'message']]
+    return df_result
+
+
+def _get_df_fit_model(source, model, weights, actuals_x_range, freq,
+                      is_fit, cost, aic_c, params, status):
+    if params is None:
+        params = np.array([])
+    df_result = (
+        pd.DataFrame(columns=['source', 'model', 'weights', 'actuals_x_range', 'freq',
+                              'is_fit', 'cost', 'aic_c', 'params_str', 'status', 'source_long', 'params'],
+                     data=[[source, model, weights, actuals_x_range, freq,
+                            is_fit, cost, aic_c, np.array_str(params, precision=1), status,
+                            '{}:{}:{}:{}'.format(source, weights, freq, actuals_x_range),
+                            params
+                            ]])
+    )
+    return df_result
+
+
+def _get_empty_df_result_optimize(source, model, status, weights, freq, actuals_x_range):
+    source_long = '{}:{}:{}:{}'.format(source, weights, freq, actuals_x_range)
+    return pd.DataFrame(columns=['source', 'model', 'success', 'params_str', 'cost', 'optimality', 'iterations',
+                                 'status', 'jac_evals', 'message', 'source_long', 'params'],
+                        data=[[source, model, False, '[]', np.NaN, np.NaN, np.NaN, status, np.NaN, status,
+                               source_long, []]])
+
+
+def normalize_df(df_y,
+                 col_name_y='y',
+                 col_name_weight='weight',
+                 col_name_x='x',
+                 col_name_date='date',
+                 col_name_source='source'):
+    """
+    Converts an input dataframe for run_forecast() into a normalized format suitable for fit_model()
+
+    :param df_y:
+    :type df_y: pandas.DataFrame
+    :param col_name_y:
+    :type col_name_y: str
+    :param col_name_weight:
+    :type col_name_weight: str
+    :param col_name_x:
+    :type col_name_x: str
+    :param col_name_date:
+    :type col_name_date: str
+    """
+
+    assert df_y is not None
+    if df_y.empty:
+        return None
+
+    if isinstance(df_y, pd.Series):
+        df_y = df_y.to_frame()
+    assert isinstance(df_y, pd.DataFrame)
+    assert col_name_y in df_y.columns, 'Dataframe needs to have a column named "{}"'.format(col_name_y)
+    df_y = df_y.copy()
+
+    # Rename columns to normalized values
+    rename_col_dict = {
+        col_name_y:'y',
+        col_name_weight:'weight',
+        col_name_x:'x',
+        col_name_date:'date',
+        col_name_source:'source'
+    }
+    df_y = df_y.copy().rename(rename_col_dict, axis=1)
+
+    # Placeholder - need to replace all references to col_name_z with z
+    col_name_y = 'y'
+    col_name_weight = 'weight'
+    col_name_x = 'x'
+    col_name_date = 'date'
+    col_name_source = 'source'
+
+    # Ensure y column is float
+    df_y[col_name_y] = df_y[col_name_y].astype(float)
+
+    multiple_sources = col_name_source in df_y.columns
+    l_sources = df_y[col_name_source].drop_duplicates() if multiple_sources else ['test_source']
+
+    l_df_results = []
+    for source in l_sources:
+        df_y_tmp = df_y.loc[df_y[col_name_source] == source].copy() if multiple_sources else df_y
+        # Setup date, x columns
+        if col_name_date not in df_y.columns and isinstance(df_y.index, pd.DatetimeIndex):  # use index as i_date
+            df_y_tmp[col_name_date] = df_y_tmp.index
+        elif col_name_date in df_y.columns:  # Ensure that date column is timestamp dtype
+            df_y_tmp[col_name_date] = df_y_tmp[col_name_date].pipe(pd.to_datetime)
+
+        #if isinstance(df_y_tmp.index, pd.DatetimeIndex):
+        # We don't need a date index after this point
+        df_y_tmp = df_y_tmp.reset_index(drop=True)
+
+        if col_name_x not in df_y_tmp.columns:
+            if col_name_date in df_y_tmp.columns:
+                # Need to extract numeric index from a_date
+                df_date_interp = (
+                    df_y_tmp[[col_name_date]].drop_duplicates().pipe(model_utils.interpolate_df).rename_axis(col_name_x).reset_index()
+                )
+                df_y_tmp = (
+                    df_date_interp.merge(df_y_tmp)
+                )
+            else:       # With no date, extract column x from a numeric index
+                df_y_tmp[col_name_x] = df_y_tmp.index
+
+        l_df_results += [df_y_tmp]
+
+    # Rename columns to normalized values
+    rename_col_dict = {
+        col_name_y: 'y',
+        col_name_weight: 'weight',
+        col_name_x: 'x',
+        col_name_date: 'date',
+        col_name_source: 'source'
+    }
+
+    df_result = pd.concat(l_df_results, sort=False, ignore_index=True) #.rename(rename_col_dict, axis=1)
+
+    # Sort columns, filter unused columns
+    df_result = df_result[[c for c in ['date', 'source', 'x', 'y', 'weight'] if c in df_result.columns]]
+    sort_columns = ['source','x'] if 'source' in df_result.columns else ['x']
+    df_result = df_result.sort_values(sort_columns).reset_index(drop=True)
+    return df_result
+
+
+def fit_model(model, df_y, freq='W', source='test', df_actuals=None):
+    """
+    Given a time series and a model, optimize model parameters and return
+
+    :param model:
+    :type model: function
+    :param df_y:
+        | Dataframe with the following columns:
+        | - y:
+        | - date: (optional)
+        | - weight: (optional)
+        | - x: (optional)
+    :type df_y: pandas.DataFrame
+    :param source:
+    :type source:
+    :param freq: 'W' or 'D' . Used only for metadata
+    :type freq: str
+    :return: table (source, model_name, y_weights , freq, is_fit, aic_c, params)
+    :rtype: pandas.DataFrame
+
+    This function calls optimize_least_squares() to perform the optimization loop. It performs some cleaning up of input
+    and output parameters.
+    """
+    col_name_y = 'y'
+    col_name_weight = 'weight'
+    col_name_x = 'x'
+    col_name_date = 'date'
+
+    assert df_y is not None and isinstance(df_y, pd.DataFrame) and col_name_y in df_y.columns
+
+    # Setup
+    f_model_name = model.name
+    n_params = model.n_params
+
+    df_y = df_y.copy()
+    # Filter out any sample where df_y is null
+    df_y = df_y.loc[~df_y[col_name_y].pipe(pd.isna)]
+
+    # Filter out any sample where a_weights is 0
+    if col_name_weight in df_y.columns:
+        df_y[col_name_weight] = df_y[col_name_weight].fillna(0)
+        df_y = df_y.loc[df_y[col_name_weight] >= 0]
+
+    # Metadata
+    if col_name_weight not in df_y.columns:
+        weights = '1'
+    else:
+        weights = '{}-{}'.format(df_y[col_name_weight].min(), df_y[col_name_weight].max())
+
+    # Residual normalization
+    if df_y[col_name_x].duplicated().any():
+        df_k = df_y.groupby(col_name_x).size().rename('k_weight_normalize').reset_index()
+        df_y = df_y.merge(df_k)
+        if col_name_weight not in df_y:
+            df_y[col_name_weight] = 1.0
+        # Adjust residual weight based on number of values per sample
+        # E.g. a sample with 2 values in the input series will multiply residuals by 0.5
+        df_y[col_name_weight] = df_y[col_name_weight]/df_y['k_weight_normalize']
+
+    # Get input arrays
+    a_y = df_y[col_name_y].values
+    a_x = model_utils.apply_a_x_scaling(df_y[col_name_x].values, model)
+    a_weights = df_y[col_name_weight].values if col_name_weight in df_y.columns else None
+    # Need to convert series to DatetimeIndex
+    i_date = pd.DatetimeIndex(df_y[col_name_date]) if col_name_date in df_y.columns else None
+
+    # Metadata
+    cost = np.NaN
+    is_fit = False
+    params = []
+    # Get first and last actuals date, for metadata. If no a_date, use a_x instead.
+    date_start_actuals = i_date.min().date() if i_date is not None else a_x.min()
+    date_end_actuals = i_date.max().date() if i_date is not None else a_x.max()
+    actuals_x_range = '{}::{}'.format(date_start_actuals, date_end_actuals)
+
+    if df_y.empty:
+        logger.info('Cannot fit - empty df_y: %s', source)
+        status = 'EMPTY_TS'
+        df_result = _get_df_fit_model(source, model.name, weights, actuals_x_range, freq,
+                                      is_fit, cost, np.NaN, None, status)
+        df_result_optimize = _get_empty_df_result_optimize(source, model, status, weights, freq, actuals_x_range)
+
+    elif a_x.size < n_params + 2:
+        logger.info('Not enough samples in source %s for %s: %s (needs %s)',
+                    source, f_model_name, a_x.size, n_params + 2)
+        status = 'TS_TOO_SHORT'
+        df_result = _get_df_fit_model(source, model.name, weights, actuals_x_range, freq,
+                                      is_fit, cost, np.NaN, None, status)
+        df_result_optimize = _get_empty_df_result_optimize(source, model, status, weights, freq, actuals_x_range)
+    else:       # Get results
+        model = forecast_models.simplify_model(model, a_x, a_y, i_date)
+
+        if model.n_params==0:
+            # 0-parameter model, cannot be fit
+            #logger.info('Model has 0 parameters - no fitting required')
+
+            a_residuals = get_residuals(None, model, a_x, a_y, i_date, a_weights, df_actuals=df_actuals)
+            cost = 0.5*np.nansum(a_residuals**2)
+            is_fit = True
+            params = np.array([])
+            status = 'FIT'
+
+            # Process results
+
+            aic_c = model_utils.get_aic_c(cost, len(df_y), n_params)
+
+            df_result = _get_df_fit_model(source, model.name, weights, actuals_x_range, freq,
+                                          is_fit, cost, aic_c, params, status)
+
+            dict_result_df = {
+                'optimality': 0.,
+                'success': True,
+                'cost':cost,
+                'iterations': 0.,
+                'jac_evals': 0.,
+                'status': 0,
+                'message': 'Naive model fitted',
+                'params': '-'
+            }
+            df_result_optimize = pd.DataFrame(data=dict_result_df, index=pd.Index([0]))
+            df_result_optimize = df_result_optimize[
+                ['success', 'params', 'cost', 'optimality', 'iterations', 'status', 'jac_evals', 'message']]
+            df_result_optimize['source'] = source
+            df_result_optimize['source_long'] = df_result.source_long.iloc[0]
+            df_result_optimize['model'] = model
+            df_result_optimize['params_str'] = df_result.params_str.iloc[0]
+            df_result_optimize = df_result_optimize[
+                ['source', 'model', 'success', 'params_str', 'cost', 'optimality', 'iterations',
+                 'status', 'jac_evals', 'message', 'source_long', 'params']]
+        else:
+            df_result_optimize = optimize_least_squares(model, a_x, a_y, i_date, a_weights, df_actuals=df_actuals)
+            cost = df_result_optimize.cost.iloc[0]
+            is_fit = df_result_optimize.success.iloc[0]
+            params = df_result_optimize.params.iloc[0]
+            status = 'FIT' if is_fit else 'NO-FIT'
+
+            # Process results
+            if status in ['FIT','NO-FIT']:
+                aic_c = model_utils.get_aic_c(cost, len(df_y), n_params)
+            else:
+                aic_c = np.NaN
+
+            df_result = _get_df_fit_model(source, model.name, weights, actuals_x_range, freq,
+                                          is_fit, cost, aic_c, params, status)
+
+            df_result_optimize['source'] = source
+            df_result_optimize['source_long'] = df_result.source_long.iloc[0]
+            df_result_optimize['model'] = model
+            df_result_optimize['params_str'] = df_result.params_str.iloc[0]
+            df_result_optimize = df_result_optimize [['source','model','success','params_str','cost','optimality','iterations',
+                                                      'status','jac_evals','message','source_long','params']]
+
+    dict_result = {'metadata':df_result, 'optimize_info':df_result_optimize}
+    return dict_result
+
+
+def extrapolate_model(model, params, date_start_actuals, date_end_actuals, freq='W', extrapolate_years=2.0,
+                      x_start_actuals=0., df_actuals=None):
+    """
+    Given a model and a set of parameters, generate model output for a date range plus a number of additional years.
+
+    :param model:
+    :type model:
+    :param params:
+    :type params:
+    :param date_start_actuals:
+    :type date_start_actuals:
+    :param date_end_actuals:
+    :type date_end_actuals:
+    :param freq:
+    :type freq:
+    :param extrapolate_years:
+    :type extrapolate_years:
+    :return:
+    :rtype:
+    """
+    s_x = model_utils.get_s_x_extrapolate(date_start_actuals, date_end_actuals, model=model, freq=freq,
+                                          extrapolate_years=extrapolate_years, x_start_actuals=x_start_actuals)
+    a_y_forecast = model(s_x.values, s_x.index, params, df_actuals=df_actuals)
+    s_y_forecast = pd.Series(data=a_y_forecast, index=s_x.index, name='y')
+    df_y_forecast = pd.DataFrame(s_y_forecast)
+    return df_y_forecast
+
+
+def get_list_model(l_model_trend, l_model_season, season_add_mult='both'):
+    if l_model_season is None or len(l_model_season) < 1:
+        l_model_tmp = l_model_trend
+    elif l_model_trend is None or len(l_model_trend) < 1:
+        l_model_tmp = l_model_season
+    else:
+        l_model_tmp = []
+        if season_add_mult != 'mult':   # 'add' or 'both'
+            l_model_tmp += [model_trend+model_season for model_trend, model_season in
+                            itertools.product(l_model_trend, l_model_season)]
+        if season_add_mult != 'add':    # 'mult' or 'both'
+            l_model_tmp += [model_trend*model_season for model_trend, model_season in
+                            itertools.product(l_model_trend, l_model_season)]
+
+    l_model_tmp = pd.Series(l_model_tmp).drop_duplicates().tolist()
+    return l_model_tmp
+
+
+def get_df_actuals_clean(df_actuals, source, source_long):
+    """
+
+    :param df_actuals: dataframe in normalized format, with columns y and optionally x, date, weight
+    :type df_actuals:
+    :param source:
+    :type source:
+    :param source_long:
+    :type source_long:
+    :return:
+    :rtype:
+    """
+    # Add actuals as entries in result dicts
+    df_actuals = df_actuals.copy()  # .rename_axis('date')
+    if 'date' not in df_actuals.columns:
+        df_actuals = df_actuals.rename({'x': 'date'}, axis=1)
+    df_actuals = df_actuals[[c for c in ['date', 'weight', 'y'] if c in df_actuals.columns]]
+
+    df_actuals['model']='actuals'
+    df_actuals['source'] = source
+    df_actuals['source_long'] = source_long
+    df_actuals['is_actuals'] = True
+    if not 'weight' in df_actuals.columns:
+        df_actuals['weight'] = 1.0
+    return df_actuals
+
+
+def _get_df_fcast_clean(df_fcast, source, source_long,model):
+    # TODO: cleanup
+    # This removes any forecast samples with null values, e.g. from naive models
+    df_fcast = df_fcast.loc[ ~df_fcast.y.pipe(pd.isnull)]
+    df_fcast = df_fcast.copy().rename_axis('date').reset_index()
+    df_fcast['source'] = source
+    df_fcast['source_long'] = source_long
+    df_fcast['model'] = model
+    df_fcast['is_actuals'] = False
+    df_fcast['weight'] = 1.0
+    return df_fcast
+
+
+"""
+# TODO: api improvements:
+- change default df format to have columns: x,y, date, weight
+- currently, we assume a datetimeindex
+
+"""
+
+
+def run_forecast_from_input_list(l_dict_input):
+    # Run forecasts from a list of dictionaries with keyword arguments
+
+    # Handle both scalars and list-likes
+    s_input = pd.Series(l_dict_input)
+
+    l_dict_result = []
+    for dict_input in l_dict_input:
+        dict_result_tmp = run_forecast(**dict_input)
+        l_dict_result += [dict_result_tmp]
+
+    # Generate output
+    return aggregate_forecast_dict_results(l_dict_result)
+
+
+def run_forecast(df_y, l_model_trend=None, l_model_season=None,
+                 date_start_actuals=None, source_id='src',
+                 col_name_y='y', col_name_weight='weight',
+                 col_name_x='x', col_name_date='date',
+                 col_name_source='source',
+                 extrapolate_years=0, season_add_mult='add',
+                 include_all_fits=False,
+                 simplify_output=True,
+                 do_find_steps_and_spikes=False,
+                 find_outliers=False,
+                 l_season_yearly=None,
+                 l_season_weekly=None,
+                 verbose=None,
+                 l_model_naive=None
+                 ):
+    """
+    Generate forecast for one or more input time series
+
+    :return:
+    :rtype:
+    :param df_y:
+    :type df_y:
+    :param l_model_trend:
+    :type l_model_trend:
+    :param l_model_season:
+    :type l_model_season:
+    :param date_start_actuals:
+    :type date_start_actuals:
+    :param source_id:
+    :type source_id:
+    :param col_name_y:
+    :type col_name_y:
+    :param col_name_weight:
+    :type col_name_weight:
+    :param col_name_x:
+    :type col_name_x:
+    :param col_name_date:
+    :type col_name_date:
+    :param col_name_source:
+    :type col_name_source:
+    :param return_all_models:
+        | If True, result includes non-fitting models, with null AIC and an empty forecast df.
+        | Otherwise, result includes only fitting models, and for time series where no fitting model is available,
+        | a 'no-best-model' entry with null AIC and an empty forecast df is added.
+    :type return_all_models: bool
+    :param return_all_fits: If True, result includes all models for each input time series. Otherwise, only the
+        best model is included.
+    :type return_all_fits: bool
+    :param extrapolate_years:
+    :type extrapolate_years: float
+    :param season_add_mult: 'add', 'mult', or 'both'. Whether forecast seasonality will be additive, multiplicative,
+        or the best fit of the two.
+    :type season_add_mult: str
+    :param fill_gaps_y_values: If True, gaps in time series will be filled with NaN values
+    :type fill_gaps_y_values: bool
+    :param freq: 'W' or 'D' . Sampling frequency of the output forecast: weekly or daily.
+    :type freq: str
+    :param do_find_steps_and_spikes: if True, find steps and spikes, create fixed models and add them
+        to the list of models
+    :type do_find_steps_and_spikes: bool
+    :param find_outliers:
+    :type find_outliers:
+    :param include_all_fits:
+    :type include_all_fits:
+    :param simplify_output: If False, return dict with forecast and metadata. Otherwise, return only forecast.
+    :type simplify_output: bool
+    :return:
+    :rtype:
+    """
+    # TODO: Add check for non-duplicate source ids
+    l_dict_result = []
+
+    df_y = normalize_df(df_y, col_name_y, col_name_weight, col_name_x, col_name_date, col_name_source)
+    if df_y is None: # Empty input
+        return None
+
+    if 'source' not in df_y.columns:
+        return run_forecast_single(df_y,
+                                   l_model_trend,
+                                   l_model_season,
+                                   date_start_actuals,
+                                   source_id,
+                                   extrapolate_years,
+                                   season_add_mult,
+                                   include_all_fits,
+                                   simplify_output,
+                                   do_find_steps_and_spikes,
+                                   find_outliers,
+                                   l_season_yearly,
+                                   l_season_weekly,
+                                   l_model_naive=l_model_naive
+                                   )
+    else:
+        for src_tmp in df_y.source.drop_duplicates():
+            if verbose:
+                logger.info('Running forecast for source: %s', src_tmp)
+            df_y_tmp = df_y.loc[df_y.source==src_tmp].reset_index(drop=True)
+            dict_result_tmp = run_forecast_single(df_y_tmp,
+                                                  l_model_trend,
+                                                  l_model_season,
+                                                  date_start_actuals,
+                                                  src_tmp,
+                                                  extrapolate_years,
+                                                  season_add_mult,
+                                                  include_all_fits,
+                                                  False,  # Simplify output
+                                                  do_find_steps_and_spikes,
+                                                  find_outliers,
+                                                  l_season_yearly,
+                                                  l_season_weekly,
+                                                  l_model_naive=l_model_naive
+                                                  )
+            l_dict_result += [dict_result_tmp]
+    # Generate output
+    dict_result = aggregate_forecast_dict_results(l_dict_result)
+    if simplify_output:
+        return dict_result.get('forecast')
+    else:
+        return dict_result
+
+
+def aggregate_forecast_dict_results(l_dict_result):
+    l_df_data = []
+    l_df_metadata = []
+    l_df_optimize_info = []
+    # Forecast with prediction interval
+    l_df_forecast = []
+
+    for dict_result in l_dict_result:
+        l_df_data += [dict_result['data']]
+        l_df_metadata += [dict_result['metadata']]
+        l_df_optimize_info += [dict_result['optimize_info']]
+        l_df_forecast += [dict_result['forecast']]
+
+    # Generate output
+    df_data = pd.concat(l_df_data, sort=False, ignore_index=True)
+    df_metadata = pd.concat(l_df_metadata, sort=False, ignore_index=True)
+    df_optimize_info = pd.concat(l_df_optimize_info, sort=False, ignore_index=True)
+    df_forecast = pd.concat(l_df_forecast, sort=False, ignore_index=True)
+
+    return {'forecast': df_forecast, 'data': df_data, 'metadata': df_metadata, 'optimize_info': df_optimize_info}
+
+def run_forecast_single(df_y,
+                        l_model_trend=None,
+                        l_model_season=None,
+                        date_start_actuals=None,
+                        source_id='src',
+                        extrapolate_years=0,
+                        season_add_mult='add',
+                        include_all_fits=False,
+                        simplify_output=True,
+                        do_find_steps_and_spikes=False,
+                        find_outliers=False,
+                        l_season_yearly=None,
+                        l_season_weekly=None,
+                        l_model_naive=None
+                        ):
+    """
+
+    :param df_y:
+    :type df_y:
+    :param l_model_trend:
+    :type l_model_trend:
+    :param l_model_season:
+    :type l_model_season:
+    :param date_start_actuals:
+    :type date_start_actuals:
+    :param source_id:
+    :type source_id:
+    :param col_name_y:
+    :type col_name_y:
+    :param col_name_weight:
+    :type col_name_weight:
+    :param col_name_x:
+    :type col_name_x:
+    :param col_name_date:
+    :type col_name_date:
+    :param return_all_models:
+        | If True, result includes non-fitting models, with null AIC and an empty forecast df.
+        | Otherwise, result includes only fitting models, and for time series where no fitting model is available,
+        | a 'no-best-model' entry with null AIC and an empty forecast df is added.
+    :type return_all_models: bool
+    :param return_all_fits: If True, result includes all models for each input time series. Otherwise, only the
+        best model is included.
+    :type return_all_fits: bool
+    :param extrapolate_years:
+    :type extrapolate_years: float
+    :param season_add_mult: 'add', 'mult', or 'both'. Whether forecast seasonality will be additive, multiplicative,
+        or the best fit of the two.
+    :type season_add_mult: str
+    :param fill_gaps_y_values: If True, gaps in time series will be filled with NaN values
+    :type fill_gaps_y_values: bool
+    :param freq: 'W' or 'D' . Sampling frequency of the output forecast: weekly or daily.
+    :type freq: str
+    :param do_find_steps_and_spikes: if True, find steps and spikes, create fixed models and add them
+        to the list of models
+    :type do_find_steps_and_spikes: bool
+    :return:
+    :rtype:
+    """
+    l_df_data = []
+    l_df_metadata = []
+    l_df_optimize_info = []
+
+    # Each element in l_fcast_input describes all model configurations for a source time series
+    source = source_id
+
+    if 'date' in df_y.columns:
+        freq = detect_freq(df_y.date)
+    else:
+        freq = None
+
+    df_y=df_y.copy()
+    df_y_unfiltered = df_y.copy()
+
+    if date_start_actuals is not None and 'date' in df_y.columns:  # Filter: only actuals after date_start_actuals
+        df_y = df_y.loc[df_y.date >= date_start_actuals]
+
+    date_start_actuals = df_y.date.min() if 'date' in df_y.columns else df_y.x.min()
+    date_end_actuals = df_y.date.max() if 'date' in df_y.columns else df_y.x.max()
+
+    # If we find outliers, we add a model with dummy variables for the outliers
+    if find_outliers:
+        model_outliers, outlier_mask = forecast_models.get_model_outliers(df_y)
+        if outlier_mask is not None:
+            if 'weight' in df_y.columns:
+                df_y['weight'] = df_y['weight'] * outlier_mask
+            else:
+                df_y['weight'] = outlier_mask
+            assert np.issubdtype(df_y.weight.astype(float), np.float64)
+    else:
+        model_outliers = None
+
+    # Add actuals to output
+    # Get weight for metadata
+    if 'weight' not in df_y.columns:
+        df_y['weight']=1
+        weights = '1'
+    else:
+        weights = '{}-{}'.format(df_y['weight'].min(), df_y['weight'].max())
+
+    # Get long source_id
+    if isinstance(date_start_actuals, pd.datetime):
+        date_start_actuals_short = date_start_actuals.date()
+        date_end_actuals_short = date_end_actuals.date()
+    else:
+        date_start_actuals_short = date_start_actuals
+        date_end_actuals_short = date_end_actuals
+    actuals_x_range = '{}::{}'.format(date_start_actuals_short, date_end_actuals_short)
+    source_long = '{}:{}:{}:{}'.format(source, weights, freq, actuals_x_range)
+    df_actuals = get_df_actuals_clean(df_y, source, source_long)
+    l_df_data+=[df_actuals]
+
+    if l_model_trend is None:
+        # By default, try linear and piecewise linear
+        l_model_trend = [
+            #forecast_models.model_naive,
+                         forecast_models.model_linear,
+                         forecast_models.model_linear+forecast_models.model_ramp]
+    l_model_season_add = None
+    l_model_season_mult = None
+    if l_model_season is None:
+        if 'date' in df_y.columns:
+            s_date_tmp = df_y.date
+            if 'weight' in df_y.columns:
+                s_date_tmp = s_date_tmp.loc[df_y.weight>0]
+
+            l_model_season_add = forecast_models.get_l_model_auto_season(s_date_tmp,season_add_mult='add',
+                                                                     l_season_yearly=l_season_yearly,
+                                                                     l_season_weekly=l_season_weekly,
+                                                                     )
+            l_model_season_mult = forecast_models.get_l_model_auto_season(s_date_tmp,season_add_mult='mult',
+                                                                     l_season_yearly=l_season_yearly,
+                                                                     l_season_weekly=l_season_weekly,
+                                                                     )
+    else:
+        l_model_season_add = l_model_season
+        l_model_season_mult = l_model_season
+
+
+    l_model_add = get_list_model(l_model_trend, l_model_season_add, 'add')
+    l_model_mult = get_list_model(l_model_trend, l_model_season_mult, 'mult')
+
+    if season_add_mult == 'add':
+        l_model = l_model_add
+    elif season_add_mult == 'mult':
+        l_model = l_model_mult
+    else: # both
+        l_model = np.unique([l_model_add+l_model_mult]).tolist()
+    # logger_info('debug l_Model',l_model)
+    if l_model_naive is not None:
+        l_model = l_model_naive+l_model
+
+    # if model_outliers is not None:
+    #     l_model_outlier = [forecast_models.model_null, model_outliers]
+    #     l_model = get_list_model(l_model, l_model_outlier, 'add')
+
+    if do_find_steps_and_spikes:
+        a_y = df_y.y.values
+        a_x = df_y.y
+
+        a_date = df_y.date if 'date' in df_y.columns else None
+
+        steps, spikes = forecast_models.find_steps_and_spikes(a_x, a_y, a_date)
+        if steps:
+            steps_summed = reduce(lambda x, y: x + y, steps)
+            steps_summed.name = '{}_fixed_steps'.format(len(steps))
+            l_model = [model + steps_summed for model in l_model]
+        if spikes:
+            spikes_mult = reduce(lambda x, y: x * y, spikes)
+            spikes_mult.name = '{}_fixed_spikes'.format(len(spikes))
+            # filter values during the spike
+            a_y_filt = spikes_mult(a_x, a_date, [])
+            df_y[a_y_filt == 0] = np.nan
+
+    # exclude samples with weight = 0
+    df_y = df_y.loc[df_y.weight > 0]
+    date_start_actuals = df_y.date.min() if 'date' in df_y.columns else df_y.x.min()
+    x_start_actuals = df_y.x.min()
+
+    df_actuals_cols = [c for c in ['date','x'] if c in df_y.columns]
+
+    df_actuals_interpolated = (  # Fills gaps, used for extrapolation
+        df_y_unfiltered
+        .merge(df_y_unfiltered[df_actuals_cols].drop_duplicates('x').pipe(model_utils.interpolate_df), how='right')
+        .sort_values(['x']).reset_index(drop=True)
+    )
+    # Update weight column in df_actuals_interpolated
+    df_actuals_interpolated = df_actuals_interpolated.drop(columns=['weight'],errors='ignore')
+    df_actuals_interpolated = df_actuals_interpolated.merge(df_y[['x','weight']],how='left')
+    df_actuals_interpolated['weight']=df_actuals_interpolated.weight.fillna(0)
+
+    # Note - In the above steps, we first remove any samples with weight = 0
+    # from the data used for fitting
+    # then we fill gaps in dates from the table used for extrapolating.
+    # The filled gaps have NaN values in the y column, 0 weight
+
+    for model in l_model:
+
+        dict_fit_model = fit_model(model, df_y, freq, source, df_actuals=df_y_unfiltered)
+        df_metadata_tmp = dict_fit_model['metadata']
+        df_optimize_info = dict_fit_model['optimize_info']
+
+        l_df_metadata += [df_metadata_tmp]
+        l_df_optimize_info += [df_optimize_info]
+        source_long = df_metadata_tmp.source_long.iloc[0]
+        params = df_metadata_tmp.params.iloc[0]
+
+        if df_metadata_tmp.is_fit.iloc[0]:  # If model is fit
+
+            # date_start_actuals = df_y.date.min()
+            # date_end_actuals = df_y.date.max()
+
+            df_data_tmp = extrapolate_model(model, params,
+                                            date_start_actuals,date_end_actuals,
+                                            freq, extrapolate_years, x_start_actuals = x_start_actuals,
+                                            df_actuals=df_actuals_interpolated)
+
+            df_data_tmp = _get_df_fcast_clean(df_data_tmp, source_id, source_long, model.name)
+
+            l_df_data += [df_data_tmp]
+
+    # Generate output
+    df_data = pd.concat(l_df_data, sort=False, ignore_index=True)
+    df_metadata = pd.concat(l_df_metadata, sort=False, ignore_index=True)
+    df_optimize_info = pd.concat(l_df_optimize_info, sort=False, ignore_index=True)
+
+    # Determine best fits
+    df_best_fit = (
+        df_metadata.loc[df_metadata.is_fit]
+            .sort_values('aic_c')
+            .groupby('source', as_index=False).first()
+        [['source_long', 'model']]
+    )
+    df_best_fit['is_best_fit'] = True
+
+    df_metadata = df_metadata.merge(df_best_fit, how='left')
+    df_metadata['is_best_fit'] = df_metadata['is_best_fit'].fillna(False).astype(bool)
+    df_data = df_data.merge(df_best_fit, how='left').reset_index(drop=True)
+    df_data['is_best_fit'] = df_data['is_best_fit'].fillna(False).astype(bool)
+
+    if not include_all_fits:
+        df_metadata = df_metadata.loc[df_metadata.is_best_fit].reset_index(drop=True)
+        df_data = df_data.loc[df_data.is_best_fit | df_data.is_actuals].reset_index(drop=True)
+
+    df_forecast = df_data.pipe(get_pi, n=100)
+    dict_result = {'forecast': df_forecast, 'data': df_data, 'metadata': df_metadata, 'optimize_info': df_optimize_info}
+
+    if simplify_output:
+        return df_forecast
+    else:
+        return dict_result
+
+
+# TODO: Better define return_all_fits, return_all_models. Document and provide clear use cases
+# TODO: Improve test, make shorter
+def run_l_forecast(l_fcast_input,
+                   col_name_y='y', col_name_weight='weight',
+                   col_name_x='x', col_name_date='date',
+                   col_name_source='source',
+                   extrapolate_years=0, season_add_mult='add',
+                   include_all_fits=False,
+                   do_find_steps_and_spikes=False,
+                   find_outliers=False):
+    """
+    Generate forecasts for a list of SolverConfig objects, each including a time series, model functions, and other
+    configuration parameters.
+
+    :param l_fcast_input: List of forecast input configurations. Each element includes a time series,
+        candidate forecast models for trend and seasonality, and other configuration parameters. For each input
+        configuration, a forecast time series will be generated.
+    :type l_fcast_input: list of ForecastInput
+    :param return_all_models:
+        | If True, result includes non-fitting models, with null AIC and an empty forecast df.
+        | Otherwise, result includes only fitting models, and for time series where no fitting model is available,
+        | a 'no-best-model' entry with null AIC and an empty forecast df is added.
+    :type return_all_models: bool
+    :param return_all_fits: If True, result includes all models for each input time series. Otherwise, only the
+        best model is included.
+    :type return_all_fits: bool
+    :param extrapolate_years:
+    :type extrapolate_years: float
+    :param season_add_mult: 'add', 'mult', or 'both'. Whether forecast seasonality will be additive, multiplicative,
+        or the best fit of the two.
+    :type season_add_mult: str
+    :param fill_gaps_y_values: If True, gaps in time series will be filled with NaN values
+    :type fill_gaps_y_values: bool
+    :param freq: 'W' or 'D' . Sampling frequency of the output forecast: weekly or daily.
+    :type freq: str
+    :return:
+        | dict(data,metadata)
+        | data: dataframe(date, source, model, y)
+        | metadata: dataframe('source', 'model', 'res_weights', 'freq', 'is_fit', 'cost', 'aic_c', 'params', 'status')
+    :rtype: dict
+
+    """
+    # TODO: Add check for non-duplicate source ids
+    l_df_data = []
+    l_df_metadata = []
+    l_df_optimize_info = []
+
+    # We can take solver_config_list that are a list or a single forecast_input
+    if type(l_fcast_input) is not list:
+        l_fcast_input = [l_fcast_input]
+
+    l_dict_result = []
+    for fcast_input in l_fcast_input:
+        dict_result = run_forecast(fcast_input.df_y, fcast_input.l_model_trend, fcast_input.l_model_season,
+                                   fcast_input.date_start_actuals, fcast_input.source_id,
+                                   col_name_y, col_name_weight,
+                                   col_name_x, col_name_date,
+                                   col_name_source,
+                                   extrapolate_years, season_add_mult,
+                                   include_all_fits, simplify_output=False,
+                                   do_find_steps_and_spikes=do_find_steps_and_spikes,
+                                   find_outliers=find_outliers)
+        l_dict_result += [dict_result]
+
+    # Generate output
+    return aggregate_forecast_dict_results(l_dict_result)
+
+
+# Forecast configuration
+
+# TODO: Rename to ForecastInput
+class ForecastInput:
+    """
+    Class that encapsulates input variables for forecast.run_forecast()
+    """
+
+    def __init__(self, source_id, df_y, l_model_trend=None, l_model_season=None,
+                 weights_y_values=1.0, date_start_actuals=None):
+        self.source_id = source_id
+        self.df_y = df_y
+        self.l_model_trend = l_model_trend if l_model_trend is not None else [forecast_models.model_linear]
+        self.l_model_season = l_model_season
+        self.weights_y_values = weights_y_values
+        self.date_start_actuals = date_start_actuals
+
+    def __str__(self):
+        str_result = (
+            'SolverConfig: {source_id} ; {df_y_shape} ; {weights_y_values};'
+            ' {l_model_trend}; {l_model_season} ; {date_start_actuals}'
+        ).format(source_id=self.source_id, df_y_shape=self.df_y.shape,
+                 l_model_trend=to_str_function_list(self.l_model_trend),
+                 l_model_season=to_str_function_list(self.l_model_season),
+                 weights_y_values=self.weights_y_values, date_start_actuals=self.date_start_actuals)
+        return str_result
+
+    def __repr__(self):
+        return self.__str__()
+
+    # TODO: REMOVE
+    @classmethod
+    def create(cls, source_id, df_y, l_model_trend, l_model_season=None,
+               weights_y_values=1.0, date_start_actuals=None):
+        return cls(source_id, df_y, pd.Series(l_model_trend), l_model_season,
+                   weights_y_values, date_start_actuals)
+
+
+"""
+Draft for a parallel computing version:
+run_forecast_parallel(n)
+- take solver_config_list, split into n parts
+- open n processes for run_forecast, each with 1/n of solver_config_list
+- merge outputs: a dict with a pd.concat() of each output dataframe
+- challenge: pickling objects: solver_config_list, pandas dataframe
+- potential solution: have solver_config_list replace dataframes with file paths 
+
+"""
+
+
+def get_pi(df_forecast, n=100):
+    if 'source' in df_forecast.columns and df_forecast.source.nunique() > 1:
+        df_result = (
+            df_forecast
+		.groupby('source', as_index=False)
+		.apply(_get_pi_single_source, n)
+		.sort_values(['source', 'is_actuals', 'date'])
+		.reset_index(drop=True)
+        )
+    else:
+        df_result = _get_pi_single_source(df_forecast, n)
+    return df_result
+
+
+# TODO: Test
+def _get_pi_single_source(df_forecast, n=100):
+    # n: Number of bootstrapped samples for prediction interval
+
+    if 'is_best_fit' in df_forecast.columns:
+        df_forecast = df_forecast.loc[df_forecast.is_actuals | df_forecast.is_best_fit].copy()
+    else:
+        df_forecast = df_forecast.copy()
+
+    if 'source' in df_forecast.columns:
+        l_cols = ['date', 'source']
+    else:
+        l_cols = ['date']
+
+    # logger_info('DEBUG - df_forecast', df_forecast.head(1))
+    if 'is_weight' in df_forecast.columns and df_forecast.is_weight.any():
+
+        # Filter out dates for outliers with weight=0
+        df_filtered_dates = (
+            df_forecast.loc[df_forecast.is_weight & df_forecast.y > 0]
+            [['date', 'source']]
+        )
+
+        # Take filtered actuals
+        df_actuals_unfiltered = df_forecast.loc[df_forecast.is_actuals & ~df_forecast.is_weight &
+                                                ~df_forecast.y.isnull()]
+        df_actuals = (df_actuals_unfiltered[['date', 'y']]
+                      .merge(df_filtered_dates, how='inner')
+                      .rename({'y': 'actuals'}, axis=1)
+                      )
+        date_last_actuals = df_actuals.date.max()
+    else:  # No weight data - use all actuals rows
+        df_actuals_unfiltered = df_forecast.loc[df_forecast.is_actuals & ~df_forecast.y.isnull()]
+        df_actuals = (df_actuals_unfiltered[['date', 'y']]
+                      .rename({'y': 'actuals'}, axis=1)
+                      )
+        date_last_actuals = df_actuals.date.max()
+    # Compute residuals for filtered actuals
+    df_residuals_tmp = df_forecast.loc[~df_forecast.is_actuals & ~df_forecast.y.pipe(pd.isnull)][l_cols+['model', 'y']]
+
+    df_residuals = df_residuals_tmp.merge(df_actuals, how='inner')
+    df_residuals['res'] = df_residuals['actuals'] - df_residuals['y']
+
+    # Filter out null values, e.g. due to null actuals
+    df_residuals = df_residuals.loc[~df_residuals.res.isnull()]
+
+    if df_residuals.empty:  # May happen if no forecast could be generated
+        logger.warning('No forecast data for source %s', df_forecast.source.head(1).iloc[0])
+        return df_actuals_unfiltered[l_cols + ['is_actuals', 'model', 'y']]
+
+    # Generate table with prediction interval
+    df_forecast_pi = (
+        df_forecast
+            .loc[~df_forecast.is_actuals & (df_forecast.date > date_last_actuals)]
+        [l_cols+['model','y']]
+    )
+
+    s_residuals_tmp = df_residuals.res
+    a_forecast_point = df_forecast_pi.y.values
+
+    length = a_forecast_point.size
+
+    a_sample = s_residuals_tmp.sample(length * n, replace=True).values.reshape(n, length)
+    a_sample = np.cumsum(a_sample, axis=1)
+
+    a_q5 = np.percentile(a_sample, 5, axis=0)
+    a_q95 = np.percentile(a_sample, 95, axis=0)
+    a_q80 = np.percentile(a_sample, 80, axis=0)
+    a_q20 = np.percentile(a_sample, 20, axis=0)
+
+    df_forecast_pi['q5'] = a_q5 + df_forecast_pi.y
+    df_forecast_pi['q20'] = a_q20 + df_forecast_pi.y
+    df_forecast_pi['q80'] = a_q80 + df_forecast_pi.y
+    df_forecast_pi['q95'] = a_q95 + df_forecast_pi.y
+    df_forecast_pi['is_actuals'] = False
+
+    # Past forecast samples, no prediction interval
+    df_forecast_past = (
+        df_forecast
+            .loc[~df_forecast.is_actuals & (df_forecast.date <= date_last_actuals)]
+        [l_cols+['model', 'is_actuals', 'y']]
+    )
+
+    df_actuals_unfiltered = df_actuals_unfiltered[l_cols + ['is_actuals', 'model', 'y']]
+    df_pi_result = pd.concat([df_actuals_unfiltered, df_forecast_past, df_forecast_pi, ], sort=False, ignore_index=True)
+
+    return df_pi_result
diff --git a/anticipy/forecast_models.py b/anticipy/forecast_models.py
new file mode 100644
index 0000000..8b46d1f
--- /dev/null
+++ b/anticipy/forecast_models.py
@@ -0,0 +1,1549 @@
+# -*- coding: utf-8 -*-
+#
+# License:          This module is released under the terms of the LICENSE file 
+#                   contained within this applications INSTALL directory
+
+"""
+Defines the ForecastModel class, which encapsulates model functions used in forecast model fitting, as well as
+their number of parameters and initialisation parameters.
+"""
+
+# TODO: Check parameter initialisation
+
+# TODO: It may be convenient to have different model functions for addition and subtraction,
+# e.g. to return 1 or 0 by default
+
+# -- Coding Conventions
+#    http://www.python.org/dev/peps/pep-0008/   -   Use the Python style guide
+#    http://sphinx.pocoo.org/rest.html          -   Use Restructured Text for docstrings
+
+# -- Public Imports
+import logging
+import numpy as np
+import pandas as pd
+import itertools
+
+# -- Private Imports
+from anticipy import model_utils
+
+# -- Globals
+logger = logging.getLogger(__name__)
+dict_fourier = {
+    'period': 365.25,  # days in year
+    'harmonics': 10  # TODO: evaluate different harmonics values
+}
+
+
+# -- Exception classes
+
+# -- Functions
+def logger_info(msg, data):
+    # Convenience function for easier log typing
+    logger.info(msg + '\n%s', data)
+
+
+def _is_multi_ts(a):
+    return a.ndim > 1 and a.shape[1] > 1
+
+
+def _get_f_init_params_default(n_params):
+    # Generate a default function for initialising model parameters: use random values between 0 and 1
+    return lambda a_x=None, a_y=None, a_date=None, is_mult=False: np.random.uniform(low=0.001, high=1, size=n_params)
+
+
+def _get_f_bounds_default(n_params):
+    # Generate a default function for model parameter boundaries. Default boundaries are (-inf, inf)
+    return lambda a_x=None, a_y=None, a_date=None: (n_params * [-np.inf], n_params * [np.inf])
+
+
+def _get_f_add_2_f_models(forecast_model1, forecast_model2):
+    def f_add_2_f_models(a_x, a_date, params, is_mult=False, **kwargs):
+        params1 = params[0:forecast_model1.n_params]
+        params2 = params[forecast_model1.n_params:]
+        return (
+                forecast_model1.f_model(a_x, a_date, params1, is_mult=False, **kwargs) +
+                forecast_model2.f_model(a_x, a_date, params2, is_mult=False, **kwargs)
+        )
+
+    return f_add_2_f_models
+
+
+def _get_f_mult_2_f_models(forecast_model1, forecast_model2):
+    def f_mult_2_f_models(a_x, a_date, params, is_mult=False, **kwargs):
+        params1 = params[0:forecast_model1.n_params]
+        params2 = params[forecast_model1.n_params:]
+        return (
+                forecast_model1.f_model(a_x, a_date, params1, is_mult=True, **kwargs) *
+                forecast_model2.f_model(a_x, a_date, params2, is_mult=True, **kwargs)
+        )
+
+    return f_mult_2_f_models
+
+
+def _get_f_add_2_f_init_params(f_init_params1, f_init_params2):
+    def f_add_2_f_init_params(a_x, a_y, a_date=None, is_mult=False):
+        return np.concatenate([f_init_params1(a_x, a_y, a_date, is_mult=False),
+                               f_init_params2(a_x, a_y, a_date, is_mult=False)])
+    return f_add_2_f_init_params
+
+def _get_f_mult_2_f_init_params(f_init_params1, f_init_params2):
+    def f_mult_2_f_init_params(a_x, a_y, a_date=None, is_mult=False):
+        return np.concatenate([f_init_params1(a_x, a_y, a_date, is_mult=True),
+                               f_init_params2(a_x, a_y, a_date, is_mult=True)])
+    return f_mult_2_f_init_params
+
+
+def _get_f_concat_2_bounds(forecast_model1, forecast_model2):
+    def f_add_2_f_bounds(a_x, a_y, a_date=None):
+        return np.concatenate((forecast_model1.f_bounds(a_x, a_y, a_date),
+                               forecast_model2.f_bounds(a_x, a_y, a_date)), axis=1)
+
+    return f_add_2_f_bounds
+
+
+# def _get_add_2_bounds(f_bounds1, f_bounds2):
+#     return np.concatenate((f_bounds1, f_bounds2), axis=1)
+
+
+# -- Classes
+
+class ForecastModel:
+    """
+    Class that encapsulates model functions for use in forecasting, as well as
+    their number of parameters and functions for parameter initialisation.
+
+    A ForecastModel instance is initialized with a model name, a number of model parameters, and a model function.
+    Class instances are callable - when called as a function, their internal model function is used. The main purpose
+    of ForecastModel objects is to generate predicted values for a time series, given a set of parameters.
+    These values can be compared to the original series to get an array of residuals::
+
+        y_predicted = model(a_x, a_date, params)
+        residuals = (a_y - y_predicted)
+
+    This is used in an optimization loop to obtain the optimal parameters for the model.
+
+    The reason for using this class instead of raw model functions is that ForecastModel supports function composition::
+
+        model_sum = fcast_model1 + fcast_model2  # fcast_model 1 and 2 are ForecastModel instances, and so is model_sum
+        a_y1 = fcast_model1(a_x, a_date, params1) + fcast_model2(a_x, a_date, params2)
+        params = np.concatenate([params1, params2])
+        a_y2 = model_sum(a_x, a_date, params)
+        a_y1 == a_y2  # True
+
+    Forecast models can be added or multiplied, with the + and * operators. Multiple levels of composition are
+    supported::
+
+        model = (model1 + model2) * model3
+
+    Model composition is used to aggregate trend and seasonality model components, among other uses.
+
+    Model functions have the following signature:
+
+    - f(a_x, a_date, params, is_mult)
+    - a_x : array of floats
+    - a_date: array of dates, same length as a_x. Only required for date-aware models, e.g. for weekly seasonality.
+    - params: array of floats - model parameters - the optimisation loop updates this to fit our actual values. Each
+      model function uses a fixed number of parameters.
+    - is_mult: boolean. True if the model is being used with multiplicative composition. Required because
+      some model functions (e.g. steps) have different behaviour
+      when added to other models than when multiplying them.
+    - returns an array of floats - with same length as a_x - output of the model defined by this object's
+      modelling function f_model and the current set of parameters
+
+    By default, model parameters are initialized as random values between 0 and 1.
+    It is possible to define a parameter initialization function that picks initial values
+    based on the original time series.
+    This is passed during ForecastModel creation with the argument f_init_params.
+    Parameter initialization is compatible with model composition:
+    the initialization function of each component will be used for that component's parameters.
+
+    Parameter initialisation functions have the following signature:
+
+    - f_init_params(a_x, a_y, is_mult)
+    - a_x: array of floats - same length as time series
+    - a_y: array of floats - time series values
+    - returns an array of floats - with length equal to this object's n_params value
+
+    By default, model parameters have no boundaries.
+    However, it is possible to define a boundary function for a model,
+    that sets boundaries for each model parameter, based on the input time series.
+    This is passed during ForecastModel creation with the argument f_bounds.
+    Boundary definition is compatible with model composition:
+    the boundary function of each component will be used for that component's parameters.
+
+    Boundary functions have the following signature:
+
+    - f_bounds(a_x, a_y, a_date)
+    - a_x: array of floats - same length as time series
+    - a_y: array of floats - time series values
+    - a_date: array of dates, same length as a_x. Only required for date-aware models, e.g. for weekly seasonality.
+    - returns a tuple of 2 arrays of floats. The first defines minimum parameter boundaries, and the second
+      the maximum parameter boundaries.
+
+    Our input time series should meet the following constraints:
+
+    - Minimum required samples depends on number of model parameters
+    - May include null values
+    - May include multiple values per sample
+    - A date array is only required if the model is date-aware
+
+    Class Usage::
+
+        model_x = ForecastModel(name, n_params, f_model, f_init_params)
+        model_name = model_x.name                           # Get model name
+        n_params = model_x.n_params                         # Get number of model parameters
+        f_init_params = model_x.f_init_params               # Get parameter initialisation function
+        init_params = f_init_params(t_values, y_values)     # Get initial parameters
+        f_model = model_x.f_model                           # Get model fitting function
+        y = f_model(a_x, a_date, parameters)                # Get model output
+
+    The following pre-generated models are available. They are available as attributes from this module:
+
+    .. csv-table:: Forecast models
+       :header: "name", "params", "formula","notes"
+       :widths: 20, 10, 20, 40
+
+       "model_null",0, "y=0", "Does nothing. Used to disable components (e.g. seasonality)"
+       "model_constant",1, "y=A", "Constant model"
+       "model_linear",2, "y=Ax + B", "Linear model"
+       "model_linear_nondec",2, "y=Ax + B", "Non decreasing linear model. With boundaries to ensure model slope >=0"
+       "model_quasilinear",3, "y=A*(x^B) + C", "Quasilinear model"
+       "model_exp",2, "y=A * B^x", "Exponential model"
+       "model_step",2, "y=0 if x<A, y=B if x>=A", "Step model"
+       "model_two_steps",4, "see model_step", "2 step models. Parameter initialization is aware of # of steps."
+       "model_sigmoid_step",3, "y = A + (B - A) / (1 + np.exp(- D * (x - C)))", "Sigmoid step model"
+       "model_sigmoid",3, "y = A + (B - A) / (1 + np.exp(- D * (x - C)))", "Sigmoid model"
+       "model_season_wday",7, "see desc.", "Weekday seasonality model. Assigns a constant value to each weekday"
+       "model_season_wday",6, "see desc.", "6-param weekday seasonality model. As above, with one constant set to 0."
+       "model_season_wday_2",2, "see desc.", "Weekend seasonality model. Assigns a constant to each of weekday/weekend"
+       "model_season_month",12, "see desc.", "Month seasonality model. Assigns a constant value to each month"
+       "model_season_fourier_yearly",10, "see desc", "Fourier yearly seasonality model"
+
+    """
+
+    def __init__(self, name, n_params, f_model, f_init_params=None, f_bounds=None):
+        """
+        Create ForecastModel
+
+        :param name:
+        :type name:
+        :param n_params:
+        :type n_params:
+        :param f_model:
+        :type f_model:
+        :param f_init_params:
+        :type f_init_params:
+        :param f_bounds:
+        :type f_bounds:
+        """
+        self.name = name
+        self.n_params = n_params
+        self.f_model = f_model
+        if f_init_params is not None:
+            self.f_init_params = f_init_params
+        else:
+            # Default initial parameters: random values between 0 and 1
+            self.f_init_params = _get_f_init_params_default(n_params)
+
+        if f_bounds is not None:
+            self.f_bounds = f_bounds
+        else:
+            self.f_bounds = _get_f_bounds_default(n_params)
+
+        # TODO - REMOVE THIS - ASSUME NORMALIZED INPUT
+        def _get_f_init_params_validated(f_init_params):
+            # Adds argument validation to a parameter initialisation function
+            def f_init_params_validated(a_x=None, a_y=None, a_date=None, is_mult=False):
+                if a_x is not None and pd.isnull(a_x).any():
+                    raise ValueError('a_x cannot have null values')
+                return f_init_params(a_x, a_y, a_date, is_mult)
+
+            return f_init_params_validated
+
+        # Add logic to f_init_params that validates input
+        self.f_init_params = _get_f_init_params_validated(self.f_init_params)
+
+    def __call__(self, a_x, a_date, params, is_mult=False, **kwargs):
+        # assert len(params)==self.n_params
+        return self.f_model(a_x, a_date, params, is_mult, **kwargs)
+
+    def __str__(self):
+        return self.name
+
+    def __repr__(self):
+        return 'ForecastModel:{}'.format(self.name)
+
+    def __add__(self, forecast_model):
+        # Check for nulls
+        if self.name == 'null':
+            return forecast_model
+        if forecast_model.name == 'null':
+            return self
+        name = '({}+{})'.format(self.name, forecast_model.name)
+        n_params = self.n_params + forecast_model.n_params
+        f_model = _get_f_add_2_f_models(self, forecast_model)
+        f_init_params = _get_f_add_2_f_init_params(self.f_init_params, forecast_model.f_init_params)
+        f_bounds = _get_f_concat_2_bounds(self, forecast_model)
+        return ForecastModel(name, n_params, f_model, f_init_params,
+                             f_bounds=f_bounds)
+
+    def __radd__(self, other):
+        return self.__add__(other)
+
+    def __mul__(self, forecast_model):
+        if self.name == 'null':
+            return forecast_model
+        if forecast_model.name == 'null':
+            return self
+        name = '({}*{})'.format(self.name, forecast_model.name)
+        n_params = self.n_params + forecast_model.n_params
+        f_model = _get_f_mult_2_f_models(self, forecast_model)
+        f_init_params = _get_f_mult_2_f_init_params(self.f_init_params, forecast_model.f_init_params)
+        f_bounds = _get_f_concat_2_bounds(self, forecast_model)
+        return ForecastModel(name, n_params, f_model, f_init_params, f_bounds)
+
+    def __rmul__(self, other):
+        return self.__mul__(other)
+
+    def __eq__(self, other):
+        if isinstance(self, other.__class__):
+            return self.name == other.name
+        return NotImplemented
+
+    def __ne__(self, other):
+        x = self.__eq__(other)
+        if x is not NotImplemented:
+            return not x
+        return NotImplemented
+
+    def __hash__(self):
+        return hash(self.name)
+
+    def __lt__(self, other):
+        return self.name < other.name
+
+# - Null model: 0
+
+def _f_model_null(a_x, a_date, params, is_mult=False, **kwargs):
+    # This model does nothing - used to disable model components (e.g. seasonality) when adding/multiplying
+    # multiple functions
+    return float(is_mult)  # Returns 1 if multiplying, 0 if adding
+
+
+model_null = ForecastModel('null', 0, _f_model_null)
+
+
+# - Constant model: :math:`Y = A`
+
+def _f_model_constant(a_x, a_date, params, is_mult=False, **kwargs):
+    [A] = params
+    y = np.full(len(a_x), A)
+    return y
+
+
+def _f_init_params_constant(a_x=None, a_y=None, a_date=None, is_mult=False):
+    if a_y is None:
+        return np.random.uniform(0, 1, 1)
+    else:
+        return np.nanmean(a_y) + np.random.uniform(0, 1, 1)
+
+
+model_constant = ForecastModel('constant', 1, _f_model_constant, _f_init_params_constant)
+
+
+# - Naive model: Y = Y(x-1)
+# Note: This model requires passing the actuals data - it is not fitted by regression
+# We still pass it to forecast.fit_model() for consistency with the rest of the library
+
+def _f_model_naive(a_x, a_date, params, is_mult=False, df_actuals=None):
+    if df_actuals is None:
+        raise ValueError('model_naive requires a df_actuals argument')
+    df_out_tmp = pd.DataFrame({'date':a_date,'x':a_x})
+    df_out = (
+        df_actuals.drop_duplicates('x') # This is not really intended to work with multiple values per sample
+        .merge(df_out_tmp, how='outer')
+    )
+    df_out['y'] = df_out.y.shift(1).fillna(method='ffill').fillna(method='bfill')
+    df_out = df_out.loc[df_out.x.isin(a_x)]
+    #df_out = df_out_tmp.merge(df_out, how='left') # TODO: CHECK THAT X,DATE order is preserved
+    # TODO: df_out = df_out.merge(df_out_tmp, how='right')
+    return df_out.y.values
+
+model_naive = ForecastModel('naive',0, _f_model_naive)
+
+# - Seasonal naive model
+# Note: This model requires passing the actuals data - it is not fitted by regression
+# We still pass it to forecast.fit_model() for consistency with the rest of the library
+
+def _fillna_wday(df):
+    df = df.copy()
+    df['wday'] = df.date.dt.weekday
+    df_tmp = df[['date', 'x']].copy()
+    for wday in np.arange(0, 7):
+        wday_name = 'wday_{}'.format(wday)
+        df_tmp[wday_name] = (
+            df.y.where((df.wday) == wday, np.NaN)  # for each wday column, set to null all values from other weekdays
+                .fillna(method='ffill')  # fill nulls with last weekly sample
+                .shift(1)  # shift so that model for each sample is last non-null weekly sample
+                .where(df.wday == wday, np.NaN)  # set values for other weekdays to null, so we can aggregate with sum
+        )
+    # logger_info('debug: df_tmp: ', df_tmp)
+
+    # Aggregate: add all weekly columns together, keep null only if all columns are null
+    def aggregate_wday(s_tmp):
+        if np.all(np.isnan(s_tmp)):
+            return np.NaN
+        else:
+            return np.nansum(s_tmp)
+
+    df['y_out'] = df_tmp.loc[:, df_tmp.columns.str.startswith('wday_')].apply(aggregate_wday, axis=1)
+    return df
+
+
+def _f_model_snaive_wday(a_x, a_date, params, is_mult=False, df_actuals=None):
+    if df_actuals is None:
+        raise ValueError('model_snaive_wday requires a df_actuals argument')
+
+    df_actuals_model = _fillna_wday(df_actuals.drop_duplicates('x'))
+
+    df_last_week = df_actuals_model.drop_duplicates('wday', keep='last')[['wday', 'y']]
+    df_last_week['y_out'] = df_last_week['y']
+    df_last_week = df_last_week[['wday', 'y_out']]
+
+    # logger_info('df_actuals_model:', df_actuals_model)
+    # logger_info('df_last_week:', df_last_week)
+
+    df_out_tmp = pd.DataFrame({'date': a_date, 'x': a_x})
+    df_out_tmp['wday'] = df_out_tmp.date.dt.weekday
+
+    # logger_info('df_out_tmp:', df_out_tmp)
+
+    df_out_actuals = (
+        df_actuals_model.merge(df_out_tmp, how='left')
+    )
+    df_out_extrapolated = (
+        df_out_tmp.loc[~df_out_tmp.date.isin(df_actuals_model.date)]
+            .merge(df_last_week)
+            .sort_values('date').reset_index(drop=True)
+    )
+
+    df_out = pd.concat([df_out_actuals, df_out_extrapolated], sort=False)
+
+    # logger_info('df_out_actuals:', df_out_actuals)
+    # logger_info('df_out_extrapolated:', df_out_extrapolated)
+
+    # Note: the line below causes trouble when samples are filtered from a_x, a_date due to find_outliers
+    df_out = df_out.loc[df_out.x.isin(a_x)]
+
+    # logger_info('df_out:', df_out)
+
+    return df_out.y_out.values
+
+model_snaive_wday = ForecastModel('snaive_wday', 0, _f_model_snaive_wday)
+
+
+# - Spike model: :math:`Y = A`, when x_min <= X < x_max
+def _f_model_spike(a_x, a_date, params, is_mult=False, **kwargs):
+    [A, x_min, x_max] = params
+    if is_mult:
+        c = 1
+    else:
+        c = 0
+    y = np.concatenate((
+        np.full(int(x_min), c),
+        np.full(int(x_max - x_min), A),
+        np.full(len(a_x) - int(x_max), c)
+    ))
+    return y
+
+
+# TODO: test f_init_params for all models
+def _f_init_params_spike(a_x=None, a_y=None, a_date=None, is_mult=False):
+    """ params are spike height, x start, x end """
+    # if not a_y.any():
+    if a_y is None:
+        return [1] + np.random.uniform(0, 1, 1) + [2]
+    else:
+        diffs = np.diff(a_y)
+        # if diffs:
+        if True:
+            diff = max(diffs)
+            x_start = np.argmax(diffs)
+            x_end = x_start + 1
+            return np.array([diff, x_start, x_end])
+            # else:
+            #     rand = np.random.randint(1, len(a_y) - 1)
+
+            return np.array([1, rand, rand + 1])
+
+
+model_spike = ForecastModel('spike', 3, _f_model_spike, _f_init_params_spike)
+
+
+# - Spike model for dates - dates are fixed for each model
+
+def _f_model_spike_date(a_x, a_date, params, date_start, date_end, is_mult=False):
+    [A] = params
+    mask_spike = (a_date >= date_start) * (a_date < date_end)
+    if is_mult:
+        y = mask_spike * A + ~mask_spike
+    else:
+        y = mask_spike * A
+
+    return y
+
+
+def _f_init_params_spike(a_x=None, a_y=None, a_date=None, is_mult=False):
+    """ params are spike height, x start, x end """
+    if a_y is None:
+        return np.concatenate([np.array([1]) + np.random.uniform(0, 1, 1)])
+    else:
+        diffs = np.diff(a_y)
+        # if diffs:
+        if True:
+            diff = max(diffs)
+            return np.array([diff])
+        # else:
+        #     rand = np.random.randint(1, len(a_y) - 1)
+        #     return [1]
+
+
+def get_model_spike_date(date_start, date_end):
+    f_model = (
+        lambda a_x, a_date, params, is_mult=False, **kwargs:
+        _f_model_spike_date(a_x, a_date, params, date_start, date_end, is_mult)
+    )
+    model_spike_date = ForecastModel('spike_date[{},{}]'.format(pd.to_datetime(date_start).date(),
+                                                                pd.to_datetime(date_end).date()),
+                                     1, f_model, _f_init_params_spike)
+    return model_spike_date
+
+
+# - Linear model: :math:`Y = A*x + B`
+
+def _f_model_linear(a_x, a_date, params, is_mult=False, **kwargs):
+    (A, B) = params
+    y = A * a_x + B
+    return y
+
+
+def _f_init_params_linear(a_x=None, a_y=None, a_date=None, is_mult=False):
+    if a_y is None:
+        return np.random.uniform(low=0, high=1, size=2)
+    else:  # TODO: Improve this
+        if a_x is not None:
+            a_x_size = np.unique(a_x).size-1
+        else:
+            a_x_size = a_y.size-1
+        A = (a_y[-1]-a_y[0])/a_x_size
+        B = a_y[0]
+        # Uniform low= 0*m, high = 1*m
+        return np.array([A, B])
+
+
+model_linear = ForecastModel('linear', 2, _f_model_linear, _f_init_params_linear)
+
+
+def f_init_params_linear_nondec(a_x=None, a_y=None, a_date=None, is_mult=False):
+    params = _f_init_params_linear(a_x, a_y, a_date)
+    if params[0] < 0:
+        params[0] = 0
+    return params
+
+
+def f_bounds_linear_nondec(a_x=None, a_y=None, a_date=None):
+    # first param should be between 0 and inf
+    return [0, -np.inf], [np.inf, np.inf]
+
+
+model_linear_nondec = ForecastModel('linear', 2, _f_model_linear,
+                                    f_init_params=f_init_params_linear_nondec,
+                                    f_bounds=f_bounds_linear_nondec)
+
+
+# - QuasiLinear model: :math:`Y = A t^{B} + C`
+
+def _f_model_quasilinear(a_x, a_date, params, is_mult=False, **kwargs):
+    (A, B, C) = params
+    y = A * np.power(a_x, B) + C
+    return y
+
+
+model_quasilinear = ForecastModel('quasilinear', 3, _f_model_quasilinear)
+
+
+# - Exponential model: math::  Y = A * B^t
+def _f_model_exp(a_x, a_date, params, is_mult=False, **kwargs):
+    (A, B) = params
+    y = A * np.power(B, a_x)
+    return y
+
+
+model_exp = ForecastModel('exponential', 2, _f_model_exp)
+
+
+def f_init_params_exp_dec(a_x=None, a_y=None, a_date=None, is_mult=False):
+    """ B param must be <= 1 to have exponential decreasing """
+    params = _get_f_init_params_default(2)(a_x, a_y, a_date)
+    return params
+
+
+def f_bounds_exp_dec(a_x=None, a_y=None, a_date=None):
+    # first param should be between 0 and inf
+    return [-np.inf, -1], [np.inf, 1]
+
+
+model_exp_dec = ForecastModel('exponential_dec', 2, _f_model_exp,
+                              f_init_params=f_init_params_exp_dec,
+                              f_bounds=f_bounds_exp_dec)
+
+
+# - Step function: :math:`Y = {0, if x < A | B, if x >= A}`
+# A is the time of step, and B is the step
+def _f_step(a_x, a_date, params, is_mult=False, **kwargs):
+    (A, B) = params
+    if is_mult:
+        y = 1 + (B - 1) * np.heaviside(a_x - A, 1)
+    else:
+        y = B * np.heaviside(a_x - A, 1)
+    return y
+
+# TODO: Implement initialisation for multiplicative composition
+def _f_init_params_step(a_x=None, a_y=None, a_date=None, is_mult=False):
+    if a_y is None:
+        return np.random.uniform(0, 1, 2)
+    else:
+        if a_y.ndim > 1:
+            a_y = a_y[:, 0]
+        df = pd.DataFrame({'b': a_y})
+        # max difference between consecutive values
+        df['diff'] = df.diff().abs()
+        # if is_mult, replace above line with something like np.concatenate([[np.NaN],a_y[:-1]/a_y[1:]])
+        a = df.nlargest(1, 'diff').index[0]
+        b = df['diff'].iloc[a]
+        return np.array([a, b * 2])
+
+
+model_step = ForecastModel('step', 2, _f_step, _f_init_params_step)
+
+
+# - Spike model for dates - dates are fixed for each model
+
+def _f_model_step_date(a_x, a_date, params, date_start, is_mult=False):
+    [A] = params
+    mask_step = (a_date>=date_start).astype(float)
+    if is_mult:
+        # y = mask_step*A + ~mask_step
+        y = mask_step * (A - 1) + 1
+    else:
+        y = mask_step * A
+
+    return y
+
+
+# TODO: Implement initialisation for multiplicative composition
+def _f_init_params_step_date(a_x=None, a_y=None, a_date=None, is_mult=False):
+    if a_y is None:
+        return np.random.uniform(0, 1, 1)
+    else:
+        if a_y.ndim > 1:
+            a_y = a_y[:, 0]
+        df = pd.DataFrame({'b': a_y})
+        # max difference between consecutive values
+        df['diff'] = df.diff().abs()
+        # if is_mult, replace above line with something like np.concatenate([[np.NaN],a_y[:-1]/a_y[1:]])
+        a = df.nlargest(1, 'diff').index[0]
+        b = df['diff'].iloc[a]
+        return np.array([b * 2])
+
+
+def get_model_step_date(date_start):
+    date_start = pd.to_datetime(date_start)
+    f_model = (
+        lambda a_x, a_date, params, is_mult=False, **kwargs:
+        _f_model_step_date(a_x, a_date, params, date_start, is_mult)
+    )
+    model_step_date = ForecastModel('step_date[{}]'.format(date_start.date()),
+                                    1, f_model, _f_init_params_step_date)
+    return model_step_date
+
+
+# Two step functions
+def _f_n_steps(n, a_x, a_date, params, is_mult=False):
+    if is_mult:
+        y = 1
+    else:
+        y = 0
+
+    for i in range(0, n + 1, 2):
+        A, B = params[i: i + 2]
+        if is_mult:
+            y = y * _f_step(a_x, a_date, (A, B), is_mult)
+        else:
+            y = y + _f_step(a_x, a_date, (A, B), is_mult)
+    return y
+
+
+def _f_two_steps(a_x, a_date, params, is_mult=False, **kwargs):
+    return _f_n_steps(n=2, a_x=a_x, a_date=a_date, params=params, is_mult=is_mult)
+
+
+def _f_init_params_n_steps(n=2, a_x=None, a_y=None, a_date=None, is_mult=False):
+    if a_y is None:
+        return np.random.uniform(0, 1, n * 2)
+    else:
+        # max difference between consecutive values
+        if a_y.ndim > 1:
+            a_y = a_y[:, 0]
+        df = pd.DataFrame({'b': a_y})
+        df['diff'] = df.diff().abs()
+        # if is_mult, replace above line with something like np.concatenate([[np.NaN],a_y[:-1]/a_y[1:]])
+        a = df.nlargest(n, 'diff').index[0:n].values
+        b = df['diff'].iloc[a].values
+        params = []
+        for i in range(0, n):
+            params += [a[i], b[i]]
+        return np.array(params)
+
+
+def _f_init_params_two_steps(a_x=None, a_y=None, a_date=None, is_mult=False):
+    return _f_init_params_n_steps(n=2, a_x=a_x, a_y=a_y, a_date=a_date, is_mult=is_mult)
+
+
+model_two_steps = ForecastModel('two_steps', 2 * 2, _f_two_steps, _f_init_params_two_steps)
+
+
+# - Sigmoid step function: :math:`Y = {A + (B - A) / (1 + np.exp(- D * (a_x - C)))}`
+# Spans from A to B, C is the position of the step in x axis
+# and D is how steep the increase is
+def _f_sigmoid(a_x, a_date, params, is_mult=False, **kwargs):
+    (B, C, D) = params
+    if is_mult:
+        A = 1
+    else:
+        A = 0
+    # TODO check if a_x is negative
+    y = A + (B - A) / (1 + np.exp(- D * (a_x - C)))
+    return y
+
+
+def _f_init_params_sigmoid_step(a_x=None, a_y=None, a_date=None, is_mult=False):
+    if a_y is None:
+        return np.random.uniform(0, 1, 3)
+    else:
+        if a_y.ndim > 1:
+            a_y = a_y[:, 0]
+        df = pd.DataFrame({'y': a_y})
+        # max difference between consecutive values
+        df['diff'] = df.diff().abs()
+        c = df.nlargest(1, 'diff').index[0]
+        b = df.loc[c, 'y']
+        d = b * b
+        return b, c, d
+
+
+def _f_init_bounds_sigmoid_step(a_x=None, a_y=None, a_date=None):
+    if a_y is None:
+        return [-np.inf, -np.inf, 0.], 3 * [np.inf]
+
+    if a_y.ndim > 1:
+        a_y = a_y[:, 0]
+    if a_x.ndim > 1:
+        a_x = a_x[:, 0]
+    diff = max(a_y) - min(a_y)
+    b_min = -2 * diff
+    b_max = 2 * diff
+    c_min = min(a_x)
+    c_max = max(a_x)
+    d_min = 0.
+    d_max = np.inf
+    return [b_min, c_min, d_min], [b_max, c_max, d_max]
+
+
+# In this model, parameter initialization is aware of number of steps
+model_sigmoid_step = ForecastModel('sigmoid_step', 3, _f_sigmoid, _f_init_params_sigmoid_step,
+                                   f_bounds=_f_init_bounds_sigmoid_step)
+
+model_sigmoid = ForecastModel('sigmoid', 3, _f_sigmoid)
+
+
+# Ramp functions - used for piecewise linear models
+
+# example : model_linear_pw2 = model_linear + model_ramp
+# example 2: model_linear_p23 = model_linear + model_ramp + model_ramp
+
+# - Ramp function: :math:`Y = {0, if x < A | B, if x >= A}`
+# A is the time of step, and B is the step
+def _f_ramp(a_x, a_date, params, is_mult=False, **kwargs):
+    (A, B) = params
+    if is_mult:
+        y = 1 + (a_x - A) * (B) * np.heaviside(a_x - A, 1)
+    else:
+        y = (a_x - A) * B * np.heaviside(a_x - A, 1)
+    return y
+
+
+def _f_init_params_ramp(a_x=None, a_y=None, a_date=None, is_mult=False):
+    # TODO: set boundaries: a_x (0.2, 0.8)
+    if a_y is None:
+        if a_x is not None:
+            nfirst_last = int(np.ceil(0.15 * a_x.size))
+            a = np.random.uniform(a_x[nfirst_last],a_x[-nfirst_last-1],1)
+        else:
+            a = np.random.uniform(0, 1, 1)
+        b = np.random.uniform(0, 1, 1)
+
+        return np.concatenate([a,
+                               b])
+    else:
+        df = pd.DataFrame({'b': a_y})  # TODO: FILTER A_Y BY 20-80 PERCENTILE IN A_X
+        if a_x is not None:
+            #
+            df['x']=a_x
+            # Required because we support input with multiple samples per x value
+            df = df.drop_duplicates('x')
+            df=df.set_index('x')
+        # max difference between consecutive values -- this assumes no null values in series
+        df['diff2'] = df.diff().diff().abs()
+
+        # We ignore the last 15% of the time series
+        skip_samples = int(np.ceil(df.index.size * 0.15))
+
+        a = (df
+            .head(-skip_samples)
+            .tail(-skip_samples)
+            .nlargest(1, 'diff2').index[0]
+            )
+        b = df['diff2'].loc[a]
+        # TODO: replace b with estimation of slope in segment 2 minus slope in segment 1 - see init_params_linear
+        # logger.info('DEBUG: init params ramp2: %s - %s ', a.tolist(),b.tolist())
+        return np.array([a, b])
+
+
+def _f_init_bounds_ramp(a_x=None, a_y=None, a_date=None):
+    if a_x is None:
+        a_min = -np.inf
+        a_max = np.inf
+    else:
+        #a_min = np.min(a_x)
+        nfirst_last = int(np.ceil(0.15 * a_x.size))
+        a_min = a_x[nfirst_last]
+        a_max = a_x[-nfirst_last]
+        #a_min = np.percentile(a_x, 15)
+        #a_max = np.percentile(a_x,85)
+    if a_y is None:
+        b_min = -np.inf
+        b_max = np.inf
+    else:
+        # df = pd.DataFrame({'b': a_y})  # TODO: FILTER A_Y BY 20-80 PERCENTILE IN A_X
+        # #max_diff2 = np.max(df.diff().diff().abs())
+        # max_diff2 = np.max(np.abs(np.diff(np.diff(a_y))))
+        #
+        # b_min = -2*max_diff2
+        # b_max = 2*max_diff2
+
+        b_min = -np.inf
+        b_max = np.inf
+    # logger_info('DEBUG: BOUNDS:',(a_min, b_min,a_max, b_max))
+    return ([a_min, b_min], [a_max, b_max])
+
+
+model_ramp = ForecastModel('ramp', 2, _f_ramp, _f_init_params_ramp, _f_init_bounds_ramp)
+
+
+# - Weekday seasonality
+
+def _f_model_season_wday(a_x, a_date, params, is_mult=False, **kwargs):
+    # Weekday seasonality model, 6 params
+    params_long = np.concatenate([[float(is_mult)], params])  # params_long[0] is default series value,
+    return params_long[a_date.weekday]
+
+
+model_season_wday = ForecastModel('season_wday', 6, _f_model_season_wday)
+
+
+# - Month seasonality
+def _f_init_params_season_month(a_x=None, a_y=None, a_date=None, is_mult=False):
+    if a_y is None or a_date is None:
+        return np.random.uniform(low=-1, high=1, size=11)
+    else:                   # TODO: Improve this
+        l_params_long = [np.mean(a_y[a_date.month==i]) for i in np.arange(1,13)]
+        l_baseline = l_params_long[-1]
+        l_params = l_params_long[:-1]
+        if not is_mult:
+            l_params_add = l_params-l_baseline
+            return l_params_add
+        else:
+            l_params_mult = l_params/l_baseline
+            return l_params_mult
+
+def _f_model_season_month(a_x, a_date, params, is_mult=False, **kwargs):
+    # Month of December is taken as default level, has no parameter
+    params_long = np.concatenate([[float(is_mult)], params])   # params_long[0] is default series value,
+    return params_long[a_date.month-1]
+
+model_season_month = ForecastModel('season_month', 11, _f_model_season_month, _f_init_params_season_month)
+
+model_season_month_old = ForecastModel('season_month_old', 11, _f_model_season_month)
+
+
+def _f_model_yearly_season_fourier(a_x, a_date, params, is_mult=False, **kwargs):
+    # Infer the time series frequency to calculate the Fourier parameters
+
+    period = dict_fourier['period']
+    harmonics = dict_fourier['harmonics']
+
+    return _f_model_season_fourier(a_date, params, period, harmonics, is_mult)
+
+
+date_origin = pd.datetime(1970, 1, 1)
+
+
+def _f_model_season_fourier(a_date, params, period, harmonics, is_mult=False):
+    # convert to days since epoch
+    t = (a_date - date_origin).days.values
+    i = np.arange(1,harmonics+1)
+    a_tmp = i.reshape(i.size,1)* t
+    k = (2.0 * np.pi / period)
+    y = np.concatenate([np.sin(k*a_tmp), np.cos(k*a_tmp)])
+
+    # now multiply by the params
+    y = np.matmul(params, y)
+    return y
+
+
+def _f_init_params_fourier_n_params(n_params, a_x=None, a_y=None, a_date=None, is_mult=False):
+    if a_y is None:
+        params = np.random.uniform(0.001, 1, n_params)
+    else:
+        # max difference in time series
+        diff = a_y.max() - a_y.min()
+        params = diff * np.random.uniform(0.001, 1, n_params)
+    return params
+
+
+def _f_init_params_fourier(a_x=None, a_y=None, a_date=None, is_mult=False):
+    n_params = 2 * dict_fourier['harmonics']
+    return _f_init_params_fourier_n_params(
+        n_params, a_x=a_x, a_y=a_y, a_date=a_date, is_mult=is_mult)
+
+
+def _f_init_bounds_fourier_nparams(n_params, a_x=None, a_y=None, a_date=None):
+    if a_y is None:
+        return n_params * [-np.inf], n_params * [np.inf]
+    if a_y.ndim > 1:
+        a_y = a_y[:, 0]
+
+    diff = a_y.max() - a_y.min()
+    return n_params * [-2 * diff], n_params * [2 * diff]
+
+
+def _f_init_bounds_fourier_yearly(a_x=None, a_y=None, a_date=None):
+    n_params = 2 * dict_fourier['harmonics']
+    return _f_init_bounds_fourier_nparams(n_params, a_x, a_y, a_date)
+
+
+model_season_fourier_yearly = ForecastModel(
+    name='season_fourier_yearly',
+    n_params=2 * dict_fourier['harmonics'],
+    f_model=_f_model_yearly_season_fourier,
+    f_init_params=_f_init_params_fourier,
+    f_bounds=_f_init_bounds_fourier_yearly)
+
+
+def get_fixed_model(forecast_model, params_fixed, is_mult=False):
+    if len(params_fixed) != forecast_model.n_params:
+        err = 'Wrong number of fixed parameters'
+        raise ValueError(err)
+    return ForecastModel(forecast_model.name + '_fixed', 0,
+                         f_model=lambda a_x, a_date, params, is_mult=is_mult, **kwargs:
+                         forecast_model.f_model(
+                             a_x=a_x, a_date=a_date, params=params_fixed, is_mult=is_mult))
+
+
+def get_iqr_thresholds(s_diff, low=0.25, high=0.75):
+    # Get thresholds based on inter quantile range
+    q1 = s_diff.quantile(low)
+    q3 = s_diff.quantile(high)
+    iqr = q3 - q1
+    thr_low = q1 - 1.5 * iqr
+    thr_hi = q3 + 1.5 * iqr
+    return thr_low, thr_hi
+
+def get_model_outliers_withgap(df, window=3):
+    # TODO: ADD CHECK, TO PREVENT REDUNDANT OPS IN DF WITHOUT GAPS
+
+    df_nogap = df.pipe(model_utils.interpolate_df, include_mask=True)
+    mask_step, mask_spike = get_model_outliers(df_nogap)
+
+    ## TODO: FOR EACH OF MASK STEP, MASK SPIKE, IF IT IS NONE, RETURN NONE
+    if mask_spike is None and mask_step is None:
+        return None,None
+    if mask_spike is not None:
+        df_nogap['mask_spike'] = mask_spike
+    if mask_step is not None:
+        df_nogap['mask_step'] = mask_step
+        df_nogap['step_in_filled_gap'] = df_nogap.mask_step * df_nogap.is_gap_filled
+        df_nogap['mask_step_patch'] = df_nogap.step_in_filled_gap.shift(-1).fillna(0)
+
+    df_nogap = df_nogap.loc[~df_nogap.is_gap_filled]
+
+    if mask_step is not None:
+        df_nogap['mask_step_patch'] = df_nogap.mask_step_patch.shift(1).fillna(0)
+        df_nogap['mask_step'] = df_nogap.mask_step + df_nogap.mask_step_patch
+
+    logger_info('df 1 - no gap:', df_nogap)
+
+    if mask_step is not None:
+        mask_step = df_nogap.mask_step.values
+    if mask_spike is not None:
+        mask_spike = df_nogap.mask_spike.values
+    return mask_step, mask_spike
+
+    # todo - clean up, return
+
+
+
+# TODO: Add option - estimate_outl_size
+# TODO: Add option - sigmoid steps
+# TODO: ADD option - gaussian spikes
+def get_model_outliers(df, window=3):
+    """
+
+    :param df:
+    :type df:
+    :param window:
+    :type window:
+    :return:
+    :rtype:
+    Note: due to the way the thresholds are defined, we require 6+ samples in series to find a spike.
+    """
+    is_mult = False
+
+    dfo = df.copy()  # dfo - df for outliers
+    with_dates = 'date' in df.columns  # If df has datetime index, use date logic in steps/spikes
+    x_col = 'date' if with_dates else 'x'
+
+    if df[x_col].duplicated().any():
+        raise ValueError('Input cannot have multiple values per sample')
+
+    # logger_info('debug 0 :', dfo)
+
+    dfo['dif'] = dfo.y.diff()  # .fillna(0)
+
+    # TODO: If df has weight column, use only samples with weight=1 for IQR
+
+    thr_low, thr_hi = get_iqr_thresholds(dfo.dif)
+    # Identify changes of state when diff value exceeds thresholds
+    dfo['ischange'] = ((dfo.dif < thr_low) | (dfo.dif > thr_hi)).astype(int)
+
+    dfo['ischange_group'] = (
+        (dfo.ischange)
+            .rolling(window, win_type=None, center=True).max()
+            .fillna(0).astype(int)
+    )
+
+    dfo['dif_filt'] = (dfo.dif * dfo.ischange).fillna(0)
+    dfo['dif_filt_abs'] = dfo.dif_filt.abs()
+
+    dfo['ischange_cumsum'] = dfo.ischange.cumsum()
+    dfo['change_group'] = dfo.ischange_group.diff().abs().fillna(0).astype(int).cumsum()
+
+    df_mean_gdiff = (
+        dfo.loc[dfo.ischange.astype(bool)].groupby('change_group')['dif_filt'].mean()
+            .rename('mean_group_diff').reset_index()
+    )
+
+    df_mean_gdiff_abs = (
+        dfo.loc[dfo.ischange.astype(bool)].groupby('change_group')['dif_filt_abs'].mean()
+            .rename('mean_group_diff_abs').reset_index()
+    )
+
+    dfo = dfo.merge(df_mean_gdiff, how='left').merge(df_mean_gdiff_abs, how='left')
+    dfo.mean_group_diff = dfo.mean_group_diff.fillna(0)
+    dfo.mean_group_diff_abs = dfo.mean_group_diff_abs.fillna(0)
+
+    dfo['is_step'] = (dfo.mean_group_diff < thr_low) | (dfo.mean_group_diff > thr_hi)
+    dfo['is_spike'] = (dfo.mean_group_diff_abs - dfo.mean_group_diff) > (thr_hi - thr_low) / 2
+    dfo['ischange_cumsum'] = dfo.ischange.cumsum()
+
+    # logger_info('DF_OUTL: ',dfo)
+
+    df_outl = (
+        dfo.loc[dfo.ischange.astype(bool)].groupby('change_group')
+        .apply(lambda x:pd.Series({'outl_start':x.head(1)[x_col].iloc[0],'outl_end':x.tail(1)[x_col].iloc[0]}))
+        .reset_index()
+    )
+
+    if df_outl.empty:  # No outliers - nothing to do
+        return None, None
+
+    df_outl = df_outl.merge(dfo[['change_group', 'is_spike', 'is_step']].drop_duplicates())
+
+    dfo = dfo.merge(df_outl, how='left')
+    dfo['outl_start'] = dfo.outl_start.fillna(0).astype(int)
+    dfo['outl_end'] = dfo.outl_end.fillna(0).astype(int)
+
+    dfo = dfo  # .reset_index()
+
+    df_spikes = df_outl.loc[df_outl.is_spike]
+    df_steps = df_outl.loc[df_outl.is_step]
+
+    l_model_outl = []
+    l_mask_step = []
+    l_mask_spike = []
+
+    for g in df_spikes.change_group:
+        s_spike = df_spikes.loc[df_spikes.change_group == g].iloc[0]
+        if with_dates:
+            mask_spike_tmp = ~((dfo.date>=pd.to_datetime(s_spike.outl_start)) &
+                              (dfo.date<pd.to_datetime(s_spike.outl_end)))
+        else:
+            mask_spike_tmp = ~((dfo.x.values>=s_spike.outl_start) &
+                             (dfo.x.values<s_spike.outl_end))
+        l_mask_spike += [mask_spike_tmp.astype(float)]
+
+    for g in df_steps.change_group:
+        s_step = df_steps.loc[df_steps.change_group == g].iloc[0]
+        if with_dates:
+            model_step_tmp = get_model_step_date(pd.to_datetime(s_step.outl_start))
+        else:
+            model_step_tmp = (
+                fix_params_fmodel(
+                    model_step, [s_step.outl_start, np.NaN])
+            )
+        l_model_outl += [model_step_tmp]
+        if with_dates:
+            mask_step_tmp = (dfo.date==pd.to_datetime(s_step.outl_start))
+        else:
+            mask_step_tmp = (dfo.x.values==s_step.outl_start)
+        l_mask_step += [mask_step_tmp.astype(float)]
+
+    if len(l_model_outl) == 0:
+        model_outliers = None
+    else:
+        model_outliers = np.prod(l_model_outl) if is_mult else np.sum(l_model_outl)
+    if len(l_mask_spike) == 0:
+        mask_spike = None
+    else:
+        mask_spike = np.prod(l_mask_spike, axis=0)
+    if len(l_mask_step)==0:
+        mask_step = None
+    else:
+        mask_step = np.sum(l_mask_step, axis=0)
+    return mask_step, mask_spike
+    #return model_outliers, mask_spike
+    # return model_outliers, l_mask_spike
+
+
+def find_steps_and_spikes(a_x, a_y, a_date, window=3, max_changes=None):
+    """
+        Automatically locate steps and spikes for a_y time series.
+        A rule of thumb for the window is two weeks.
+
+    :param a_x: The values of the x axis
+    :param a_y: The values of the y axis
+    :param a_date: The values of the x axis, if they are dates
+    :param window: The x-axis window to aggregate multiple steps/spikes
+    :type window: int
+    :param max_changes: Manual input of the number of changes that the
+                        input time series is expected to have.
+                        If max_changes = 0 or None, the function returns
+                        all changes (steps / spikes) found.
+    :type max_changes: int
+    :return: changes_list
+    :rtype: list of dictionaries
+            Each dictionary includes information on a single change,
+            and is of the format:
+            {'change_type': change_type,  # ('spike' or 'step')
+            'duration': duration,  # int - the x-axis duration of the change
+            'diff': diff,  # the difference within the change
+            'x': x,  # the x-axis at the middle of the change
+            'date': date}  # the x-axis date at the middle of the change
+    """
+    # find NaNs in a_y
+    df = pd.DataFrame({'y': a_y}).interpolate('slinear')
+
+    # Find peak changes in diff
+    df['diff'] = df.y.diff()
+
+    # Outliers are < Q1 - 3 * IQR, > Q3 + 3 * IQR
+    # Q1 and Q3 are the 25th (1st) and 75th (3rd) quartiles, and
+    # IQR is the inter-quartile range
+    q1 = df['diff'].quantile(0.25)
+    q3 = df['diff'].quantile(0.75)
+    iqr = q3 - q1
+    low_thresh = q1 - 1.5 * iqr
+    high_thresh = q3 + 1.5 * iqr
+
+    # df['is_change'] = 0
+    step_filt = (df['diff'] < low_thresh) | (df['diff'] > high_thresh)
+    df['is_change'] = step_filt.astype(int)
+
+    if not any(step_filt):
+        return [], []
+
+    # df.loc[step_filt, 'is_change'] = 1
+
+    # Now that we have found the outliers in differences,
+    # group consecutive steps together
+
+    # get only the diffs that correspond to changes
+    df['diff'] = df['diff'] * df['is_change']
+    df[['diff_sum', 'change_sum']] = df[['diff', 'is_change']].rolling(
+        window, win_type=None, center=True).sum()
+
+    # we have steps, we may need to aggregate
+    # We split the array with zeros.
+    # This means that we treat all nearby changes as one,
+    # within `window` values
+    split = np.split(df['change_sum'],
+                     np.where(df['change_sum'] == 0.)[0])
+    # get rid of zero only series
+    split = [i for i in split if i.any()]
+
+    # Now we have a list of series with the changes
+    changes_list = []
+    for s in split:
+        change_s = df.iloc[s.index]
+        change_max_occur = change_s[change_s.is_change == 1].index.max()
+        change_min_occur = change_s[change_s.is_change == 1].index.min()
+        diff = change_s['diff'].sum()
+        duration = change_max_occur - change_min_occur
+
+        if low_thresh <= diff <= high_thresh:  # Change is a spike
+            change_type = 'spike'
+            # we keep the starting point as x
+            x = change_min_occur
+            # get the average change for the values of the change that
+            # are in the changing threshold
+            diff = change_s.loc[(change_s.is_change == 1) &
+                                ((change_s['diff'] < low_thresh) |
+                                 (change_s['diff'] > high_thresh)), 'diff'].abs().mean()
+
+        else:  # Change is a step
+            # here we have a different starting point
+            x = (change_max_occur + change_min_occur - 1) / 2.0
+            change_type = 'step'
+
+        d = {'change_type': change_type,
+             'duration': duration,
+             'diff': diff,
+             'x': x}
+        changes_list += [d]
+
+    # Sort by absolute difference, in descending order
+    sorted_changes_list = sorted(changes_list, key=lambda ch: abs(ch['diff']),
+                                 reverse=True)
+
+    # Rule of thumb: the maximum number of changes
+    # is the square root of the time series length
+    max_max_changes = int(np.floor(np.sqrt(len(a_y))))
+    # If we have a max_changes input value, select the ones with higher diff
+    if (not max_changes) or (max_changes > max_max_changes):
+        max_changes = max_max_changes
+
+    changes_list = sorted_changes_list[:max_changes]
+
+    steps = []
+    spikes = []
+    for c in changes_list:
+        # get models
+        if c['change_type'] == 'spike':
+            # spike = create_fixed_spike(c['diff'], x=c['x'],
+            #                            duration=c['duration'])
+            spike = create_fixed_spike_ignored(x=c['x'],
+                                               duration=c['duration'])
+            spikes += [spike]
+        elif c['change_type'] == 'step':
+            step = create_fixed_step(diff=c['diff'], x=c['x'])
+            steps += [step]
+        else:
+            raise ValueError('Invalid change type: ' + c['change_type'])
+
+    return steps, spikes
+
+
+def create_fixed_step(diff, x):
+    fixed_params = [x, diff]
+    return get_fixed_model(model_step, fixed_params)
+
+
+def create_fixed_spike(diff, x, duration):
+    fixed_params = [diff, x, x + duration]
+    return get_fixed_model(model_spike, fixed_params)
+
+
+def create_fixed_spike_ignored(x, duration):
+    fixed_params = [0, x, x + duration]
+    return get_fixed_model(model_spike, fixed_params, is_mult=True)
+
+
+# Dummy variable models
+
+def get_model_dummy(name, dummy, **kwargs):
+    """
+    Generate a model based on a dummy variable.
+
+    :param name:
+    :type name:
+    :param dummy:
+      | Can be a function or a list-like.
+      | If a function, it must be of the form f_dummy(a_x, a_date), and return a numpy array of floats
+      | with the same length as a_x and values that are either 0 or 1.
+      | If a list-like of numerics, it will be converted to a f_dummy function as described above, which will
+      | have values of 1 when a_x has one of the values in the list, and 0 otherwise.
+      | If a list-like of date-likes, it will be converted to a f_dummy function as described above, which will
+      | have values of 1 when a_date has one of the values in the list, and 0 otherwise.
+    :type dummy: function, or list-like of numerics or datetime-likes
+    :param kwargs:
+    :type kwargs:
+    :return:
+      | A model that returns A when dummy is 1, and 0 (or 1 if is_mult==True) otherwise.
+    :rtype: ForecastModel
+
+
+    """
+    return ForecastModel(name, 1, get_f_model_dummy(dummy), **kwargs)
+
+
+def _validate_f_dummy(f_dummy):
+    # Ensures that behaviour of f_dummy matches specs
+    # Must return array of floats, same length as a_x, with values either 0. or 1.
+    def validate_for_dummy(a_dummy):
+        assert isinstance(a_dummy, np.ndarray)
+        assert (np.setdiff1d(a_dummy, np.array([0., 1.])).size) == 0
+
+    # validate_for_dummy(f_dummy(np.arange(0, 10), None)) # Crashes with f_dummy 's that require dates
+    validate_for_dummy(f_dummy(np.arange(0, 10), pd.date_range('2018-01-01', '2018-01-10')))
+
+
+def get_f_model_dummy(dummy):
+    """
+    Generate a model function for a dummy variable defined by f_dummy
+
+    :param dummy:
+    :type dummy: function or list-like of numerics or dates
+    :return: model function based on dummy variable, to use on a ForecastModel
+    :rtype: function
+    """
+
+    if callable(dummy):  # If dummy is a function, use it
+        f_dummy = dummy
+    else:
+        f_dummy = get_f_dummy_from_list(dummy)  # If dummy is a list, convert to function
+
+    _validate_f_dummy(f_dummy)
+
+    def f_model_check(a_x, a_date, params, is_mult=False, **kwargs):
+        # Uses internal f_check to assign 0 or 1 to each sample
+        # If f_dummy(x)==1, return A
+        # If f_dummy(x)==0, return 0 (or 1 if is_mult)
+        [A] = params
+        mask = f_dummy(a_x, a_date)
+        if not is_mult:
+            a_result = A * mask
+        else:
+            a_result = (A - 1.) * mask + 1
+        return a_result
+
+    return f_model_check
+
+
+def get_f_dummy_from_list(list_check):
+    """
+    Generate a f_dummy function that defines a dummy variable, can be used for dummy models
+
+    :param list_check: Input list
+    :type list_check: list-like of numerics or datetime-likes
+    :return: f_dummy
+    :rtype: function
+    """
+    # Generate a f_dummy function that defines a dummy variable, can be used for dummy models
+    s_check = pd.Series(list_check)
+    if pd.api.types.is_numeric_dtype(s_check):
+        list_check_numeric = s_check
+
+        def f_dummy_list_numeric(a_x, a_date):
+            # return a_x in check_numeric
+            return np.isin(a_x, list_check_numeric).astype(float)
+
+        return f_dummy_list_numeric
+    else:
+        try:
+            list_check_date = pd.to_datetime(s_check)
+
+            def f_dummy_list_date(a_x, a_date):
+                # return a_x in check_numeric
+                return np.isin(a_date, list_check_date).astype(float)
+
+            return f_dummy_list_date
+        except:
+            raise ValueError('list_dummy must be a list-like with numeric or date-like values: %s', list_check)
+
+
+model_season_wday_2 = get_model_dummy('season_wday_2', lambda a_x, a_date, **kwargs: (a_date.weekday < 5).astype(float))
+
+# Example dummy model - checks if it is Christmas
+model_dummy_christmas = get_model_dummy('dummy_christmas',
+                                        lambda a_x, a_date, **kwargs: ((a_date.month == 12) & (a_date.day == 25)).astype(float))
+
+# Example dummy model - checks if it is first day of month
+model_dummy_month_start = get_model_dummy('dummy_month_start',
+                                          lambda a_x, a_date, **kwargs: (a_date.day == 1).astype(float))
+
+
+# Utility functions
+
+def fix_params_fmodel(forecast_model, l_params_fixed):
+    """
+    Given a forecast model and a list of floats, modify the model so that some of its parameters become fixed
+
+    :param forecast_model:
+    :type forecast_model:
+    :param l_params_fixed: List of floats with same length as number of parameters in model. For each element, a
+        non-null value means that the parameter in that position is fixed to that value. A null value means that
+        the parameter in that position is not fixed.
+    :type l_params_fixed: list
+    :return: A forecast model with a number of parameters equal to the number of null values in l_params_fixed,
+        with f_model modified so that some of its parameters gain fixed values equal to the non-null values in l_params
+    :rtype:
+    """
+    assert len(l_params_fixed) == forecast_model.n_params
+
+    l_params_fixed = np.array(l_params_fixed)
+
+    a_null = np.isnan(l_params_fixed)
+    i_null = np.nonzero(a_null)
+
+    name = '{}_fixed_{}'.format(forecast_model.name, str(l_params_fixed).replace('nan', ':')
+                                )
+    n_params = len(i_null[0])
+
+    def f_model_fixed(a_x, a_date, params, is_mult=False, **kwargs):
+        params_long = l_params_fixed
+        params_long[i_null] = params
+        return forecast_model.f_model(a_x, a_date, params_long, is_mult)
+
+    def f_init_params_fixed(a_x=None, a_y=None, a_date=None, is_mult=False):
+        # return params short
+        params_init = forecast_model.f_init_params(a_x, a_y, a_date, is_mult)
+        params_init_short = np.array(params_init)[i_null]
+        return params_init_short
+
+    def f_bounds_fixed(a_x=None, a_y=None, a_date=None):
+        # return f_bounds short
+        bounds_min, bounds_max = forecast_model.f_bounds(a_x, a_y, a_date)
+        bounds_min_short = np.array(bounds_min)[i_null]
+        bounds_max_short = np.array(bounds_max)[i_null]
+        return bounds_min_short, bounds_max_short
+
+    model_result = ForecastModel(name, n_params, f_model_fixed, f_init_params_fixed, f_bounds_fixed)
+    return model_result
+
+
+def simplify_model(f_model, a_x=None, a_y=None, a_date=None):
+    """
+    Check a model's bounds, and update model to make parameters fixed if their min and max bounds are equal
+
+    :param f_model:
+    :type f_model:
+    :param a_x:
+    :type a_x:
+    :param a_y:
+    :type a_y:
+    :param a_date:
+    :type a_date:
+    :return:
+    :rtype:
+    """
+    bounds_min, bounds_max = f_model.f_bounds(a_x, a_y, a_date)
+    bounds_diff = np.array(bounds_max) - np.array(bounds_min)
+    i_diff_zero = np.nonzero(bounds_diff == 0)
+    # For any parameter, if bounds_min == bounds_max, that parameter becomes fixed
+
+    if i_diff_zero[0].size == 0:
+        return f_model
+    else:  # We make parameters fixed if their min and max bounds are equal
+        params_fixed = np.full(f_model.n_params, np.NaN)
+        params_fixed[i_diff_zero, ] = bounds_max[i_diff_zero, ]
+        f_model = fix_params_fmodel(f_model, params_fixed)
+        logger.info('Some min and max bounds are equal - generating fixed model: %s', f_model.name)
+        return f_model
+
+
+def validate_initial_guess(initial_guess, bounds):
+    initial_guess = np.array(initial_guess)
+    bounds_min, bounds_max = bounds
+    return np.all((initial_guess >= bounds_min) & (initial_guess <= bounds_max))
+
+
+def get_l_model_auto_season(a_date, min_periods=1.5, season_add_mult='add',
+                            l_season_yearly=None, l_season_weekly=None):
+    """
+    Generates a list of candidate seasonality models for an series of timestamps
+
+    :param a_date:
+    :type a_date:
+    :param min_periods:
+    :type min_periods:
+    :param is_mult:
+    :type is_mult:
+    :return:
+    :rtype:
+    """
+    s_date = pd.Series(a_date).sort_values().drop_duplicates()
+    min_date_delta = s_date.diff().min()
+    max_date_delta = s_date.max() - s_date.min()
+
+    if pd.isna(min_date_delta) or pd.isna(max_date_delta):
+        return [model_null]
+
+    use_season_yearly = (
+            (max_date_delta > pd.Timedelta(min_periods * 365, unit='d')) &  # Need more than a full year
+            (min_date_delta <= pd.Timedelta(92, unit='d'))  # Need at least quarterly samples
+    )
+
+    use_season_weekly = (
+            (max_date_delta > pd.Timedelta(min_periods * 7, unit='d')) &  # Need more than a full week
+            (min_date_delta <= pd.Timedelta(1, unit='d'))  # Need at least daily samples
+    )
+
+    l_season_yearly_default = [
+        # model_season_month,
+        model_season_fourier_yearly,
+        model_null] if l_season_yearly is None else l_season_yearly
+    l_season_weekly_default = [model_season_wday, model_null] if l_season_weekly is None else l_season_weekly
+
+    if use_season_weekly:
+        l_season_weekly = l_season_weekly_default
+    else:
+        l_season_weekly = [model_null]
+
+    if use_season_yearly:
+        l_season_yearly = l_season_yearly_default
+        # TODO: add season_yearly_fourier
+        # TODO: add holiday list
+    else:
+        l_season_yearly = [model_null]
+
+    l_result = [model_null]
+    for s_w, s_y in itertools.product(l_season_weekly, l_season_yearly):
+
+        model_season_add = s_w + s_y
+        model_season_mult = s_w * s_y
+
+        if season_add_mult in ['add']  and model_season_add != model_null:
+            l_result += [model_season_add]
+        if season_add_mult in ['mult'] and model_season_mult != model_null and \
+                model_season_mult not in l_result:
+            l_result += [model_season_mult]
+
+    return l_result
+
+    """
+    todo: RENAME SEASON MODELS
+    - season_yearly_month, season_yearly_fourier, season_yearly_quarter
+    - season_weekly_wday, season_weekly_wkd
+    
+    """
diff --git a/anticipy/forecast_plot.py b/anticipy/forecast_plot.py
new file mode 100644
index 0000000..de61920
--- /dev/null
+++ b/anticipy/forecast_plot.py
@@ -0,0 +1,266 @@
+# -*- coding: utf-8 -*-
+#
+# License:          This module is released under the terms of the LICENSE file 
+#                   contained within this applications INSTALL directory
+
+"""
+    __high_level_module_description_here__
+"""
+
+# -- Coding Conventions
+#    http://www.python.org/dev/peps/pep-0008/   -   Use the Python style guide
+#    http://sphinx.pocoo.org/rest.html          -   Use Restructured Text for docstrings
+
+# -- Public Imports
+from tempfile import NamedTemporaryFile
+import os
+import matplotlib.pyplot as plt
+import logging
+import numpy as np
+
+# -- Globals
+logger = logging.getLogger(__name__)
+
+# ---- R Globals
+
+r_utils = (
+    """
+    require(scales)
+    require(stringr)
+
+    get_label_f = function(div=1, mult=1, curr='',unit='', digits=1){ # ... includes digits parameter, passed to string format()
+      #  Returns formatting functions for scale labels
+      function(x, ...)  {
+        paste0(curr, format(x*mult/div, digits=digits, ..., big.mark = ",", scientific = FALSE, trim = TRUE),unit) %>%
+          str_replace(paste0(curr,'-'),paste0('-',curr))
+      }
+    }
+    # Scale for thousands of units
+    s_y_k = scale_y_continuous(labels=get_label_f(div=1000, unit='k', digits=1)) 
+    # Scale for millions of units
+    s_y_m = scale_y_continuous(labels=get_label_f(div=10^6, unit='M', digits=4)) 
+    """)
+
+
+# -- Functions
+
+# ----- Utility functions
+def logger_info(msg, data):
+    # Convenience function for easier log typing
+    logger.info(msg + '\n%s', data)
+
+
+def df_string_to_unicode(df):
+    # In a dataframe, convert any string columns to unicode strings
+    df = df.copy()
+    columns_str = df.dtypes == basestring
+    if not columns_str.any():
+        return df
+    for col in df.columns[columns_str]:
+        df[col] = df[col].astype('unicode')
+    return df
+
+
+def to_feather(df, file_path):
+    # Save dataframe as feather file. Formats strings on unicode, for compatibility with R. Drops index.
+    df.reset_index(drop=True).pipe(df_string_to_unicode).to_feather(file_path)
+
+
+def pix_to_in(width_px=None, height_px=None, dpi=300):
+    # Utility function to use pixel dimensions rather than ggplot's physical dims
+    dpi = float(dpi)
+
+    width_in = width_px / dpi if width_px is not None else np.NaN
+    height_in = height_px / dpi if height_px is not None else np.NaN
+    # print width_in, height_in
+    return width_in, height_in
+
+
+def has_pi (df_fcast):
+    return 'q5' in df_fcast.columns
+
+# ---- Plotting functions
+
+
+def _plot_forecast_create(df_fcast, width=None, height=None, title=None, dpi=70, col_name_y='y',
+                          col_name_source='source', col_name_date='date', col_name_model='model', scale=None):
+    """
+    Creates ggplot object from forecast dataframe
+
+    :param df_fcast:
+        | Forecast Dataframe with the following columns:
+        | - date (timestamp)
+        | - model (str) : ID for the forecast model
+        | - y (float) : Value of the time series in that sample
+        | - is_actuals (bool) : True for actuals samples, False for forecasted samples
+    :type df_fcast: pandas.DataFrame
+    :param title: Plot title
+    :type title: str
+    :param scale: Scale of y axis: If 'k', show thousands, and if 'M', show millions
+    :type scale: str
+    :return: The plot
+    :rtype: matplotlib plot instance
+    """
+    # Default palette from ggplot
+    act_col = '#00BFC4'
+    for_col = '#F8766D'
+    plt.style.use('ggplot')
+    figsize = (width / dpi, height / dpi)
+
+    # Clean actuals - weights do not get plotted
+    df_fcast = df_fcast.loc[df_fcast.model != 'weight']
+
+    # create the DatetimeIndex
+    df_fcast = df_fcast.set_index('date')
+
+    if 'source' in df_fcast.columns:
+        just_one = False
+        sources = df_fcast.loc[df_fcast['is_actuals'], 'source'].unique()
+        num_plots = len(sources)
+        nrows = int(np.ceil(np.sqrt(num_plots)))
+        ncols = int(np.ceil(1. * num_plots / nrows))
+    else:
+        # Only one set of actuals and forecast needed
+        just_one = True
+        sources = ['y']
+        nrows = 1
+        ncols = 1
+
+    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, dpi=dpi, squeeze=False)
+    fig.canvas.set_window_title(title)
+
+    x = 0
+    y = 0
+    for src in sources:
+        ax = axes[x, y]
+
+        # Filter the specific source is subplots
+        if just_one:
+            source_filt = True
+        else:
+            source_filt = df_fcast['source'] == src
+
+        actuals, = ax.plot(df_fcast.loc[source_filt & df_fcast['is_actuals'], :].index,
+                           df_fcast.loc[source_filt & df_fcast['is_actuals'], 'y'],
+                           color=act_col, marker='o', linestyle='None', label='Actuals')
+        forecast, = ax.plot(df_fcast.loc[source_filt & ~df_fcast['is_actuals'], :].index,
+                            df_fcast.loc[source_filt & ~df_fcast['is_actuals'], 'y'],
+                            color=for_col, marker='None', linestyle='solid', label='Forecast')
+
+        # Fill area between 5th and 95th prediction interval
+        if ('q5' in df_fcast.columns) and ('q95' in df_fcast.columns):
+            where_to_fill = (source_filt &
+                             (~df_fcast['is_actuals']) &
+                             (~df_fcast['q5'].isnull()) &
+                             (~df_fcast['q95'].isnull()))
+            ax.fill_between(df_fcast.index, df_fcast['q5'], df_fcast['q95'],
+                            where=where_to_fill,
+                            facecolor=for_col, alpha=0.2)
+
+        if ('q20' in df_fcast.columns) and ('q80' in df_fcast.columns):
+            # Fill area between 20th and 80th prediction interval
+            where_to_fill_2 = (source_filt &
+                               (~df_fcast['is_actuals']) &
+                               (~df_fcast['q20'].isnull()) &
+                               (~df_fcast['q80'].isnull()))
+            ax.fill_between(df_fcast.index, df_fcast['q20'], df_fcast['q80'],
+                            where=where_to_fill_2,
+                            facecolor=for_col, alpha=0.2)
+
+        if not just_one:
+            # Set the title of each subplot as per source name
+            ax.set_title(src)
+
+        ax.legend(handles=[actuals, forecast],
+                  labels=['Actuals', 'Forecast'], loc='upper left')
+
+        y += 1
+        if y >= ncols:
+            # New row
+            y = 0
+            x += 1
+
+    # Now make the rest of the graphs invisible
+    while x < nrows:
+        while y < ncols:
+            axes[x, y].set_visible(False)
+            y += 1
+        # New row
+        y = 0
+        x += 1
+
+    return plt.Figure
+
+
+def plot_forecast_save(df_fcast, file_path, width=None, height=None, title=None, dpi=70, col_name_y='y',
+                       col_name_source='source', col_name_date='date', col_name_model='model',
+                       scale=None, device='png',
+                       transparent_bg=False):
+    """
+    Generates matplotlib plot and saves as file
+
+    :param df_fcast:
+        | Forecast Dataframe with the following columns:
+        | - date (timestamp)
+        | - model (str) : ID for the forecast model
+        | - y (float) : Value of the time series in that sample
+        | - is_actuals (bool) : True for actuals samples, False for forecasted samples
+    :type df_fcast: pandas.DataFrame
+    :param file_path: File path for output
+    :type file_path: str
+    :param width: Image width, in pixels
+    :type width: int
+    :param height: Image height, in pixels
+    :type height: int
+    :param title: Plot title
+    :type title: str
+    :param dpi: Image dpi
+    :type dpi: Image dpi
+    :param device: 'png' or 'pdf'
+    :type device: str
+    """
+
+    fig = _plot_forecast_create(df_fcast, width, height, title, dpi, col_name_y, col_name_source,
+                                col_name_date, col_name_model, scale)
+
+    dirname, fname = os.path.split(file_path)
+    if not os.path.exists(dirname):
+        logger.error('Path missing {}'.format(file_path))
+        os.makedirs(dirname)
+    plt.savefig(file_path, dpi=dpi)
+
+
+def plot_forecast(df_fcast, width=None, height=None, title=None, dpi=70, scale=None, device='png',
+                  col_name_y='y', col_name_source='source', col_name_date='date', col_name_model='model',
+                  transparent_bg=False):
+    """
+    Generates plot and shows in an ipython notebook
+
+    :param df_fcast:
+        | Forecast Dataframe with the following columns:
+        | - date (timestamp)
+        | - model (str) : ID for the forecast model
+        | - y (float) : Value of the time series in that sample
+        | - is_actuals (bool) : True for actuals samples, False for forecasted samples
+    :type df_fcast: pandas.DataFrame
+    :param width: Image width, in pixels
+    :type width: int
+    :param height: Image height, in pixels
+    :type height: int
+    :param title: Plot title
+    :type title: str
+    :param dpi: Image dpi
+    :type dpi: Image dpi
+    :return: Ipython image, to display in a notebook
+    :rtype: Ipython.display.Image
+    """
+    try:
+        from IPython.display import Image
+    except ImportError:
+        logger.info('IPython not available, skipping...')
+        return None
+
+    file_plot = NamedTemporaryFile()
+    plot_forecast_save(df_fcast, file_plot.name, width, height, title, dpi, scale, device,
+                       col_name_y, col_name_source, col_name_date, col_name_model, transparent_bg)
+    return Image(filename=file_plot.name, format='png')
diff --git a/anticipy/model_utils.py b/anticipy/model_utils.py
new file mode 100644
index 0000000..90481cf
--- /dev/null
+++ b/anticipy/model_utils.py
@@ -0,0 +1,291 @@
+# -*- coding: utf-8 -*-
+#
+# License:          This module is released under the terms of the LICENSE file 
+#                   contained within this applications INSTALL directory
+
+"""
+Utility functions for model generation
+"""
+
+# -- Coding Conventions
+#    http://www.python.org/dev/peps/pep-0008/   -   Use the Python style guide
+#    http://sphinx.pocoo.org/rest.html          -   Use Restructured Text for docstrings
+
+# -- Public Imports
+import logging
+import math
+import numpy as np
+import pandas as pd
+
+# -- Private Imports
+
+# -- Globals
+
+
+logger = logging.getLogger(__name__)
+
+dict_wday_name = {
+    0: 'W-MON',
+    1: 'W-TUE',
+    2: 'W-WED',
+    3: 'W-THU',
+    4: 'W-FRI',
+    5: 'W-SAT',
+    6: 'W-SUN',
+}
+
+
+# -- Exception classes
+
+# -- Functions
+def logger_info(msg, data):
+    # Convenience function for easier log typing
+    logger.info(msg + '\n%s', data)
+
+
+def array_transpose(a):
+    """
+    Transpose a 1-D numpy array
+
+    :param a: An array with shape (n,)
+    :type a: numpy.Array
+    :return: The original array, with shape (n,1)
+    :rtype: numpy.Array
+    """
+    return a[np.newaxis, :].T
+
+
+# TODO: rework to support model composition
+def model_requires_scaling(model):
+    """
+    Given a :py:class:`nsa.forecast.forecast_models.ForecastModel`  return True if the function requires
+    scaling a_x
+
+    :param model: A get_model_<modeltype> function from :py:mod:`nsa.forecast.model.periodic_models` or
+        :py:mod:`nsa.forecast.model.aperiodic_models`
+    :type model: function
+    :return: True if function is logistic or sigmoidal
+    :rtype: bool
+    """
+    requires_scaling = model is not None and model.name in [
+        'logistic',
+        'sigmoid'
+    ]
+    return requires_scaling
+
+
+def apply_a_x_scaling(a_x, model=None, scaling_factor=100.0):
+    """
+    Modify a_x for forecast_models that require it
+
+    :param a_x: x axis of time series
+    :type a_x: numpy array
+    :param model: a :py:class:`nsa.forecast.forecast_models.ForecastModel`
+    :type model: function or None
+    :param scaling_factor: Value used for scaling t_values for logistic models
+    :type scaling_factor: float
+    :return: a_x with scaling applied, if required
+    :rtype: numpy array
+    """
+    if model_requires_scaling(model):  # todo: check that this is still useful
+        a_x = a_x / scaling_factor
+    return a_x
+
+
+dict_freq_units_per_year = {'A': 1.0, 'Y': 1.0, 'D': 365.0, 'W': 52.0, 'M': 12, 'Q': 4, 'H': 24 * 365.0}
+
+
+def get_s_x_extrapolate(date_start_actuals, date_end_actuals, model=None, freq='W',  extrapolate_years=2.5,
+                        shifted_origin=0, scaling_factor=100.0, x_start_actuals=0.):
+    """
+    Return t_values series with DateTimeIndex, covering the date range for the actuals, plus a forecast period.
+
+
+    :param date_start_actuals: date or numeric index for first actuals sample
+    :type date_start_actuals: str, datetime, int or float
+    :param date_end_actuals: date or numeric index for last actuals sample
+    :type date_end_actuals: str, datetime, int or float
+    :param extrapolate_years:
+    :type extrapolate_years: float
+    :param model:
+    :type model: function
+    :param freq: Time unit between samples. Supported units are 'W' for weekly samples, or 'D' for daily samples.
+        (untested) Any date unit or time unit accepted by numpy should also work, see
+        https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.datetime.html#arrays-dtypes-dateunits
+    :type freq: str or int
+    :param shifted_origin: Offset to apply to a_x
+    :type shifted_origin: int
+    :param scaling_factor: Value used for scaling a_x for certain model functions
+    :type scaling_factor: float
+    :return: Series of floats with DateTimeIndex. To be used as (a_date, a_x) input for a model function.
+    :rtype: pandas.Series
+
+    The returned series covers the actuals time domain plus a forecast period lasting extrapolate_years, in years.
+    The number of additional samples for the forecast period is time_resolution * extrapolate_years, rounded down
+    """
+    if isinstance(date_start_actuals, str) or isinstance(date_start_actuals, pd.datetime):  # Use dates if available
+        date_start_actuals = pd.to_datetime(date_start_actuals)
+        date_end_actuals = pd.to_datetime(date_end_actuals)
+
+        if freq is None: # Default frequency
+            freq='W'
+
+        freq_short = freq[0:1]      # Changes e.g. W-MON to W
+        # freq_units_per_year = 52.0 if freq_short=='W' else 365.0   # Todo: change to dict to support more frequencies
+        freq_units_per_year = dict_freq_units_per_year.get(freq_short, 365.0)
+        extrapolate_units = extrapolate_years*freq_units_per_year
+        date_end_forecast = date_end_actuals+pd.to_timedelta(extrapolate_units, unit=freq_short)
+
+        index = pd.date_range(date_start_actuals, date_end_forecast, freq=freq, name='date')
+    else:  # Otherwise, use numeric index - we extrapolate future samples equal to 100*extrapolate_years
+        index = pd.Index(np.arange(date_start_actuals, date_end_actuals+100*extrapolate_years))
+
+    s_x = pd.Series(index=index, data=np.arange(x_start_actuals, x_start_actuals+index.size))+shifted_origin
+    if model_requires_scaling(model):
+        s_x = s_x / scaling_factor
+
+    return s_x
+
+
+# Forecast Selection Functions
+
+def get_aic_c(fit_error, n, n_params):
+    """
+    This function implements the corrected Akaike Information Criterion (AICc), taking as input
+    a given fit error and data/model degrees of freedom. We assume that the residuals of the candidate model
+    are distributed according to independent identical normal distributions with zero mean. Hence, we can use
+    define the AICc as
+
+    .. math::
+
+        AICc = AIC + \\frac{2k(k+1)}{n-k-1} = 2k + n \\log\\left(\\frac{E}{n}\\right) + \\frac{2k(k+1)}{n-k-1},
+
+    where :math:`k` and :math:`n` denotes the model and data degrees of freedom respectively, and :math:`E`
+    denotes the residual error of the fit.
+
+    :param fit_error: Residual error of the fit
+    :type fit_error: float
+    :param n: Data degrees of freedom
+    :type n: int
+    :param n_params: Model degrees of freedom
+    :type n_params: int
+    :return: Corrected Akaike Information Criterion (AICc)
+    :rtype: float
+
+    Note:
+
+    - see AIC in `Wikipedia article on the AIC <https://en.wikipedia.org/wiki/Akaike_information_criterion>`_.
+
+    """
+    # First, deal with corner cases that can blow things up with division by zero
+    if (n <= n_params + 1) or (n == 0):
+        aux = n - n_params - 1
+        raise ValueError(
+            'ERROR: Time series too short for AIC_C: (n = ' + str(n) + ', n - n_params - 1 = ' + str(aux) + ')')
+    elif fit_error == 0.0:
+        if n_params == 1:
+            aicc = -float("inf")
+        else:
+            # This can lead to suboptimal model selection when we have multiple perfect fits - we use a patch instead
+            # aicc = -float("inf")
+            fit_error = 10 ** -320
+            aicc = n * math.log(fit_error / n) + 2 * n_params + (2 * n_params * (n_params + 1) / (n - n_params - 1))
+
+    else:
+        # Actual calculation of the AICc
+        aicc = n * math.log(fit_error / n) + 2 * n_params + (2 * n_params * (n_params + 1) / (n - n_params - 1))
+
+    # logger.info('DEBUG: getting aicc, fit_error: %s, n: %s, n_params: %s, aicc: %s', fit_error, n, n_params, aicc)
+    return aicc
+
+
+def get_s_aic_c_best_result_key(s_aic_c):
+    # Required because aic_c can be -inf, that value is not compatible with pd.Series.argmin()
+    if s_aic_c.empty or s_aic_c.isnull().all():
+        return None
+    if (s_aic_c.values == -np.inf).any():
+        (key_best_result,) = (s_aic_c == -np.inf).nonzero()
+        key_best_result = s_aic_c.index[key_best_result.min()]
+    else:
+        key_best_result = s_aic_c.argmin()
+    return key_best_result
+
+
+def detect_freq(a_date):
+    if isinstance(a_date, pd.DataFrame):
+        if 'date' not in a_date.columns:
+            return None
+        else:
+            a_date = a_date.date
+    s_date = pd.Series(a_date).sort_values().drop_duplicates()
+    min_date_delta = s_date.diff().min()
+    if pd.isnull(min_date_delta):
+        return None
+    elif min_date_delta == pd.Timedelta(1, unit='h'):
+        return 'H'
+    elif min_date_delta == pd.Timedelta(7, unit='D'):
+        # Weekly seasonality - need to determine day of week
+        min_date_wday = s_date.min().weekday()
+        return dict_wday_name.get(min_date_wday, 'W')
+    elif min_date_delta >= pd.Timedelta(28, unit='d') and \
+            min_date_delta <= pd.Timedelta(31, unit='d'):
+        # MS is month start, M is month end. We use MS if all dates match first of month
+        if s_date.dt.day.max() == 1:
+            return 'MS'
+        else:
+            return 'M'
+    elif min_date_delta >= pd.Timedelta(89, unit='d') and \
+            min_date_delta <= pd.Timedelta(92, unit='d'):
+        return 'Q'
+    elif min_date_delta >= pd.Timedelta(365, unit='d') and \
+            min_date_delta <= pd.Timedelta(366, unit='d'):
+        # YS is month start, Y is month end. We use MS if all dates match first of month
+        if s_date.dt.day.max() == 1 and s_date.dt.month.max() == 1:
+            return 'YS'
+        else:
+            return 'Y'
+    elif min_date_delta >= pd.Timedelta(23, unit='h'):
+            #and min_date_delta <= pd.Timedelta(1, unit='d')\
+        return 'D'
+    else:
+        return None
+
+
+def interpolate_df(df, include_mask=False):
+    # In a dataframe with date gaps, replace gaps with interpolation
+    if not 'date' in df.columns:    # interpolate by x column
+        if df.x.diff().nunique <=1:
+            return df
+        else:
+            df_result = (
+                df.set_index('x')
+                    .reindex(pd.RangeIndex(df.x.min(), df.x.max()+1, name='x'))
+                    .interpolate()
+                    .reset_index()
+            )
+
+    else:   # df has date column - interpolate by date
+        s_date_diff = df.date.diff()
+        if s_date_diff.pipe(pd.isnull).all():
+            s_date_diff_first = None
+        else:
+            s_date_diff_first = s_date_diff.loc[s_date_diff.first_valid_index()]
+        freq = detect_freq(df)
+        # If space between samples is constant, no interpolation is required
+        # Exception: in sparse series with date gaps, we can randomly get gaps that are constant but
+        # don't match any real period, e.g. 8 days
+
+        if s_date_diff.nunique() <=1 and not (freq == 'D' and s_date_diff_first>pd.to_timedelta(1, 'day')):
+            # TODO: Add additional check for e.g. 2-sample series with 8-day gap
+            return df
+        df_result = (
+            df.set_index('date')
+                .asfreq(freq)
+                .interpolate()
+                .reset_index()
+        )
+    if 'x' in df.columns:
+        df_result['x'] = df_result['x'].astype(df.x.dtype)
+        if include_mask:
+            df_result['is_gap_filled'] = ~df_result.x.isin(df.x)
+    return df_result
diff --git a/anticipy/utils_test.py b/anticipy/utils_test.py
new file mode 100644
index 0000000..49c21bf
--- /dev/null
+++ b/anticipy/utils_test.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+#
+# License:          This module is released under the terms of the LICENSE file
+#                   contained within this applications INSTALL directory
+
+"""
+    Class and functions to test pandas dataframes and series
+"""
+
+# -- Coding Conventions
+#    http://www.python.org/dev/peps/pep-0008/   -   Use the Python style guide
+#    http://sphinx.pocoo.org/rest.html          -   Use Restructured Text for docstrings
+
+# -- Public Imports
+import unittest
+import numpy as np
+import pandas as pd
+import pandas.util.testing as pdt
+import logging
+
+# -- Globals
+logger = logging.getLogger(__name__)
+
+
+# -- Exception classes
+
+# -- Functions
+def logger_info(msg, data):
+    # Convenience function for easier log typing
+    logger.info(msg + '\n%s', data)
+
+
+def _is_dtype_categorical(x):
+    if type(x) is pd.DataFrame:
+        # Slightly faster than x.dtypes == 'category'
+        return x.dtypes.apply(lambda x: x.name == 'category')
+    else:
+        # Used because x.dtype =='category' doesn't always work
+        return x.dtype.name == 'category'
+
+
+# -- Classes
+class PandasTest(unittest.TestCase):
+
+    def assert_frame_equal(self, left, right, ignore_index=False, compare_as_strings=False,
+                           ignore_column_order=False, **kwargs):
+        """
+        Checks that 2 dataframes are equal
+
+        :param left:
+        :type left:
+        :param right:
+        :type right:
+        :param ignore_index:
+        :type ignore_index:
+        :param compare_as_strings:
+        :type compare_as_strings:
+        :param kwargs:
+        :type kwargs:
+
+        """
+        l = left
+        r = right
+        if ignore_index:
+            l = l.reset_index(drop=True)
+            r = r.reset_index(drop=True)
+        if compare_as_strings:
+            l = l.astype(str)
+            r = r.astype(str)
+        if ignore_column_order:
+            r = r.pdu_reorder(l.columns)
+        pdt.assert_frame_equal(l, r, **kwargs)
+
+    def assert_frame_not_equal(self, left, right, ignore_index=False, **kwargs):
+        if ignore_index:
+            with self.assertRaises(AssertionError):
+                pdt.assert_frame_equal(left.reset_index(drop=True), right.reset_index(drop=True), **kwargs)
+        else:
+            with self.assertRaises(AssertionError):
+                pdt.assert_frame_equal(left, right, **kwargs)
+
+    def assert_series_equal(self, left, right, ignore_index=False, compare_as_strings=False, ignore_name=True,
+                            **kwargs):
+        """
+        Checks that 2 series are equal
+
+        :param left:
+        :type left:
+        :param right:
+        :type right:
+        :param ignore_index:
+        :type ignore_index:
+        :param compare_as_strings:
+        :type compare_as_strings:
+        :param kwargs:
+        :type kwargs:
+        """
+        l = left
+        r = right
+        pdt._check_isinstance(l, r, pd.Series)
+        if ignore_index:
+            l = l.reset_index(drop=True)
+            r = r.reset_index(drop=True)
+        if compare_as_strings:
+            l = l.astype(str)
+            r = r.astype(str)
+        if ignore_name:
+            l = l.rename(None)
+            r = r.rename(None)
+
+        if _is_dtype_categorical(l) or _is_dtype_categorical(r):
+            self.assertTrue(_is_dtype_categorical(l))
+            self.assertTrue(_is_dtype_categorical(r))
+            self.assertTrue(r.equals(l))
+            self.assertEqual(l.cat.ordered, r.cat.ordered)
+        else:
+            pdt.assert_series_equal(l, r, **kwargs)
+
+    def assert_array_equal(self, left, right):
+        np.testing.assert_array_equal(left, right)
+
+# -- Main
diff --git a/setup.py b/setup.py
index 0492f9e..07a52aa 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup, find_packages
 
-__version__ = "0.0.1"
+__version__ = "0.0.2"
 
 # -- Edit Start
 zip_safe = False
@@ -8,13 +8,13 @@
 modules = []
 
 dependencies = [
-    'pandas>=0.20.3',
+    'matplotlib>=2.2.3',
     'numpy>=1.13.3',
+    'pandas>=0.20.3',
     'scipy>=1.0.0',
 ]
 
 extras_require={
-'r':['rpy2>=2.8.3'],
 }
 
 dependency_links = [
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..f5c41f0
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+__all__=[]
\ No newline at end of file
diff --git a/tests/data/candy_production.csv b/tests/data/candy_production.csv
new file mode 100755
index 0000000..015b3f2
--- /dev/null
+++ b/tests/data/candy_production.csv
@@ -0,0 +1,549 @@
+observation_date,IPG3113N
+1972-01-01,85.6945
+1972-02-01,71.8200
+1972-03-01,66.0229
+1972-04-01,64.5645
+1972-05-01,65.0100
+1972-06-01,67.6467
+1972-07-01,69.0429
+1972-08-01,70.8370
+1972-09-01,75.0462
+1972-10-01,106.9289
+1972-11-01,105.5962
+1972-12-01,105.9673
+1973-01-01,91.2997
+1973-02-01,77.2700
+1973-03-01,69.6110
+1973-04-01,70.2986
+1973-05-01,71.6822
+1973-06-01,74.8635
+1973-07-01,72.0464
+1973-08-01,73.1748
+1973-09-01,80.5915
+1973-10-01,102.9200
+1973-11-01,109.2524
+1973-12-01,105.2210
+1974-01-01,88.6985
+1974-02-01,83.6098
+1974-03-01,77.2300
+1974-04-01,67.3209
+1974-05-01,74.6196
+1974-06-01,79.5858
+1974-07-01,66.0568
+1974-08-01,71.1864
+1974-09-01,70.1750
+1974-10-01,99.2212
+1974-11-01,101.1201
+1974-12-01,86.8930
+1975-01-01,67.0117
+1975-02-01,52.6964
+1975-03-01,50.6689
+1975-04-01,59.7613
+1975-05-01,60.8277
+1975-06-01,63.3629
+1975-07-01,62.3089
+1975-08-01,66.9021
+1975-09-01,66.3200
+1975-10-01,96.3411
+1975-11-01,105.6285
+1975-12-01,102.1819
+1976-01-01,87.9578
+1976-02-01,75.1878
+1976-03-01,62.0101
+1976-04-01,64.4758
+1976-05-01,70.5454
+1976-06-01,68.2086
+1976-07-01,69.3122
+1976-08-01,71.5922
+1976-09-01,76.9073
+1976-10-01,107.9049
+1976-11-01,111.6584
+1976-12-01,113.9655
+1977-01-01,97.3515
+1977-02-01,90.0083
+1977-03-01,77.2871
+1977-04-01,76.0459
+1977-05-01,77.9316
+1977-06-01,78.3077
+1977-07-01,75.8701
+1977-08-01,78.1822
+1977-09-01,84.2727
+1977-10-01,109.2254
+1977-11-01,106.1656
+1977-12-01,113.0575
+1978-01-01,90.1141
+1978-02-01,80.4678
+1978-03-01,76.4640
+1978-04-01,77.4211
+1978-05-01,76.7081
+1978-06-01,78.1769
+1978-07-01,72.4653
+1978-08-01,75.9054
+1978-09-01,82.7320
+1978-10-01,105.0435
+1978-11-01,111.6915
+1978-12-01,114.0821
+1979-01-01,98.6382
+1979-02-01,84.7727
+1979-03-01,81.0653
+1979-04-01,77.1607
+1979-05-01,78.3780
+1979-06-01,81.0958
+1979-07-01,74.7939
+1979-08-01,77.1113
+1979-09-01,80.8078
+1979-10-01,101.0970
+1979-11-01,106.7263
+1979-12-01,105.6220
+1980-01-01,86.9268
+1980-02-01,84.4365
+1980-03-01,74.4834
+1980-04-01,65.5610
+1980-05-01,74.3631
+1980-06-01,76.9925
+1980-07-01,71.0376
+1980-08-01,77.2616
+1980-09-01,77.9510
+1980-10-01,100.8283
+1980-11-01,106.7109
+1980-12-01,107.0469
+1981-01-01,96.3481
+1981-02-01,90.4918
+1981-03-01,78.0943
+1981-04-01,78.0284
+1981-05-01,83.3531
+1981-06-01,83.0404
+1981-07-01,79.2798
+1981-08-01,81.7679
+1981-09-01,83.2954
+1981-10-01,118.4981
+1981-11-01,116.9605
+1981-12-01,113.2558
+1982-01-01,95.9863
+1982-02-01,92.9899
+1982-03-01,83.0765
+1982-04-01,73.5603
+1982-05-01,76.4383
+1982-06-01,78.5492
+1982-07-01,76.3145
+1982-08-01,77.7653
+1982-09-01,81.3017
+1982-10-01,114.1349
+1982-11-01,114.9389
+1982-12-01,115.1824
+1983-01-01,95.1877
+1983-02-01,87.1973
+1983-03-01,77.9717
+1983-04-01,73.7339
+1983-05-01,75.5696
+1983-06-01,74.7701
+1983-07-01,76.3340
+1983-08-01,79.5580
+1983-09-01,82.8953
+1983-10-01,110.4480
+1983-11-01,106.5100
+1983-12-01,103.9983
+1984-01-01,93.8437
+1984-02-01,86.3220
+1984-03-01,78.9029
+1984-04-01,75.6699
+1984-05-01,77.8830
+1984-06-01,77.6690
+1984-07-01,76.9080
+1984-08-01,81.2320
+1984-09-01,85.8844
+1984-10-01,112.1683
+1984-11-01,115.5118
+1984-12-01,112.8158
+1985-01-01,97.6849
+1985-02-01,87.1184
+1985-03-01,79.1429
+1985-04-01,76.2069
+1985-05-01,77.3304
+1985-06-01,75.8357
+1985-07-01,75.1953
+1985-08-01,79.9166
+1985-09-01,89.5288
+1985-10-01,112.2728
+1985-11-01,113.6916
+1985-12-01,117.1114
+1986-01-01,97.3994
+1986-02-01,93.6471
+1986-03-01,78.8262
+1986-04-01,73.6548
+1986-05-01,76.5236
+1986-06-01,76.7767
+1986-07-01,73.4034
+1986-08-01,79.5478
+1986-09-01,88.4485
+1986-10-01,115.9014
+1986-11-01,119.4066
+1986-12-01,115.4294
+1987-01-01,97.1736
+1987-02-01,94.2793
+1987-03-01,83.6225
+1987-04-01,77.3408
+1987-05-01,78.0336
+1987-06-01,79.1708
+1987-07-01,76.1298
+1987-08-01,83.5260
+1987-09-01,90.7704
+1987-10-01,121.6259
+1987-11-01,124.8565
+1987-12-01,122.6595
+1988-01-01,95.8055
+1988-02-01,95.3010
+1988-03-01,89.8740
+1988-04-01,80.8266
+1988-05-01,82.4593
+1988-06-01,86.7724
+1988-07-01,90.7579
+1988-08-01,98.0626
+1988-09-01,102.5171
+1988-10-01,125.7369
+1988-11-01,123.4990
+1988-12-01,122.4540
+1989-01-01,102.9508
+1989-02-01,102.3499
+1989-03-01,93.4219
+1989-04-01,88.7382
+1989-05-01,87.9183
+1989-06-01,90.5658
+1989-07-01,89.7340
+1989-08-01,96.5697
+1989-09-01,101.0261
+1989-10-01,120.0367
+1989-11-01,123.3104
+1989-12-01,125.9960
+1990-01-01,99.9894
+1990-02-01,101.2116
+1990-03-01,94.8477
+1990-04-01,88.4239
+1990-05-01,88.6775
+1990-06-01,92.7610
+1990-07-01,96.9885
+1990-08-01,102.3169
+1990-09-01,108.6388
+1990-10-01,124.4571
+1990-11-01,133.2020
+1990-12-01,134.4426
+1991-01-01,107.4831
+1991-02-01,111.4080
+1991-03-01,104.8112
+1991-04-01,96.0485
+1991-05-01,94.9222
+1991-06-01,102.6901
+1991-07-01,100.1583
+1991-08-01,109.7879
+1991-09-01,111.1361
+1991-10-01,124.0982
+1991-11-01,129.3138
+1991-12-01,124.9696
+1992-01-01,104.3101
+1992-02-01,102.7870
+1992-03-01,94.9205
+1992-04-01,92.0467
+1992-05-01,89.7304
+1992-06-01,92.8576
+1992-07-01,92.1938
+1992-08-01,96.2302
+1992-09-01,104.1677
+1992-10-01,118.9880
+1992-11-01,122.1755
+1992-12-01,121.4803
+1993-01-01,105.0701
+1993-02-01,102.2842
+1993-03-01,94.8146
+1993-04-01,89.6044
+1993-05-01,88.4397
+1993-06-01,94.8144
+1993-07-01,95.7128
+1993-08-01,103.4214
+1993-09-01,108.6304
+1993-10-01,124.8315
+1993-11-01,124.8048
+1993-12-01,122.7720
+1994-01-01,105.3330
+1994-02-01,100.7475
+1994-03-01,98.4825
+1994-04-01,89.3258
+1994-05-01,87.0124
+1994-06-01,93.7943
+1994-07-01,96.4548
+1994-08-01,102.9823
+1994-09-01,109.7563
+1994-10-01,123.0683
+1994-11-01,123.1853
+1994-12-01,124.9834
+1995-01-01,107.7064
+1995-02-01,99.2227
+1995-03-01,96.1946
+1995-04-01,94.2656
+1995-05-01,93.5966
+1995-06-01,98.2886
+1995-07-01,98.2274
+1995-08-01,102.2729
+1995-09-01,107.2035
+1995-10-01,120.4112
+1995-11-01,123.8626
+1995-12-01,128.9061
+1996-01-01,104.1852
+1996-02-01,105.5477
+1996-03-01,102.9177
+1996-04-01,94.8080
+1996-05-01,97.0557
+1996-06-01,100.4093
+1996-07-01,98.2829
+1996-08-01,108.0978
+1996-09-01,114.7798
+1996-10-01,126.2366
+1996-11-01,133.8463
+1996-12-01,136.1510
+1997-01-01,110.6349
+1997-02-01,109.6545
+1997-03-01,106.5499
+1997-04-01,98.1605
+1997-05-01,97.3783
+1997-06-01,101.5750
+1997-07-01,98.3122
+1997-08-01,109.9673
+1997-09-01,115.0362
+1997-10-01,129.5071
+1997-11-01,135.1607
+1997-12-01,136.0268
+1998-01-01,119.7766
+1998-02-01,117.1886
+1998-03-01,110.8164
+1998-04-01,106.1647
+1998-05-01,107.1149
+1998-06-01,110.8432
+1998-07-01,109.0117
+1998-08-01,117.6771
+1998-09-01,120.9282
+1998-10-01,132.6661
+1998-11-01,136.9855
+1998-12-01,135.9605
+1999-01-01,117.3789
+1999-02-01,114.6903
+1999-03-01,107.1010
+1999-04-01,106.2725
+1999-05-01,107.8371
+1999-06-01,108.3356
+1999-07-01,107.8132
+1999-08-01,112.5035
+1999-09-01,116.6453
+1999-10-01,131.6485
+1999-11-01,131.7630
+1999-12-01,134.5654
+2000-01-01,123.1325
+2000-02-01,119.7423
+2000-03-01,113.9508
+2000-04-01,115.9481
+2000-05-01,108.7202
+2000-06-01,114.2071
+2000-07-01,111.8737
+2000-08-01,117.9027
+2000-09-01,125.6499
+2000-10-01,136.8146
+2000-11-01,135.6331
+2000-12-01,138.7040
+2001-01-01,122.5767
+2001-02-01,121.8879
+2001-03-01,118.5969
+2001-04-01,114.6967
+2001-05-01,112.9349
+2001-06-01,115.3333
+2001-07-01,113.4896
+2001-08-01,119.6772
+2001-09-01,123.5141
+2001-10-01,125.5298
+2001-11-01,128.7324
+2001-12-01,129.6597
+2002-01-01,117.5658
+2002-02-01,114.5385
+2002-03-01,110.3068
+2002-04-01,104.6927
+2002-05-01,101.8499
+2002-06-01,112.2162
+2002-07-01,110.4021
+2002-08-01,117.5309
+2002-09-01,119.4877
+2002-10-01,124.0385
+2002-11-01,128.5738
+2002-12-01,124.1789
+2003-01-01,113.0303
+2003-02-01,111.1786
+2003-03-01,111.2168
+2003-04-01,105.1536
+2003-05-01,107.6101
+2003-06-01,111.6628
+2003-07-01,102.8517
+2003-08-01,113.1477
+2003-09-01,116.8135
+2003-10-01,125.1860
+2003-11-01,132.7907
+2003-12-01,128.8211
+2004-01-01,116.1890
+2004-02-01,117.6700
+2004-03-01,103.9096
+2004-04-01,102.2036
+2004-05-01,109.8036
+2004-06-01,106.2960
+2004-07-01,106.1117
+2004-08-01,116.2320
+2004-09-01,120.7290
+2004-10-01,132.3043
+2004-11-01,133.1452
+2004-12-01,129.9987
+2005-01-01,124.5687
+2005-02-01,123.9260
+2005-03-01,107.2575
+2005-04-01,106.8015
+2005-05-01,111.4551
+2005-06-01,107.1940
+2005-07-01,110.2132
+2005-08-01,114.8196
+2005-09-01,119.7252
+2005-10-01,137.1695
+2005-11-01,136.3902
+2005-12-01,139.9153
+2006-01-01,118.2816
+2006-02-01,117.8165
+2006-03-01,108.4194
+2006-04-01,107.5783
+2006-05-01,101.9894
+2006-06-01,101.9425
+2006-07-01,101.7114
+2006-08-01,112.0216
+2006-09-01,118.6654
+2006-10-01,129.6397
+2006-11-01,130.3710
+2006-12-01,132.0261
+2007-01-01,121.7363
+2007-02-01,116.4986
+2007-03-01,112.6224
+2007-04-01,99.4400
+2007-05-01,98.0703
+2007-06-01,94.9320
+2007-07-01,91.3872
+2007-08-01,100.7496
+2007-09-01,110.1524
+2007-10-01,115.9774
+2007-11-01,118.4564
+2007-12-01,120.7117
+2008-01-01,108.7465
+2008-02-01,101.7820
+2008-03-01,97.2060
+2008-04-01,91.8637
+2008-05-01,88.9254
+2008-06-01,89.0084
+2008-07-01,85.1186
+2008-08-01,88.5622
+2008-09-01,103.2736
+2008-10-01,114.0601
+2008-11-01,115.8743
+2008-12-01,101.7672
+2009-01-01,89.9004
+2009-02-01,88.9836
+2009-03-01,85.5603
+2009-04-01,79.7102
+2009-05-01,80.2515
+2009-06-01,79.5651
+2009-07-01,82.3126
+2009-08-01,89.0494
+2009-09-01,101.1519
+2009-10-01,123.6728
+2009-11-01,117.0719
+2009-12-01,116.5435
+2010-01-01,100.3797
+2010-02-01,99.0155
+2010-03-01,91.9654
+2010-04-01,89.4914
+2010-05-01,89.9713
+2010-06-01,89.5047
+2010-07-01,96.4638
+2010-08-01,106.7689
+2010-09-01,115.8542
+2010-10-01,126.2773
+2010-11-01,117.7195
+2010-12-01,118.7519
+2011-01-01,103.0635
+2011-02-01,102.5548
+2011-03-01,98.9834
+2011-04-01,97.5274
+2011-05-01,91.3629
+2011-06-01,89.6899
+2011-07-01,89.6268
+2011-08-01,91.8899
+2011-09-01,93.9062
+2011-10-01,116.7634
+2011-11-01,116.8258
+2011-12-01,114.9563
+2012-01-01,99.9662
+2012-02-01,99.0417
+2012-03-01,94.1484
+2012-04-01,87.6950
+2012-05-01,85.3510
+2012-06-01,86.5815
+2012-07-01,89.5217
+2012-08-01,98.2967
+2012-09-01,112.2694
+2012-10-01,114.9091
+2012-11-01,116.0791
+2012-12-01,116.1401
+2013-01-01,107.0733
+2013-02-01,102.0263
+2013-03-01,102.6319
+2013-04-01,95.3206
+2013-05-01,91.7584
+2013-06-01,91.8125
+2013-07-01,92.4299
+2013-08-01,100.3593
+2013-09-01,105.5167
+2013-10-01,117.3458
+2013-11-01,121.6179
+2013-12-01,123.2412
+2014-01-01,104.5665
+2014-02-01,103.9509
+2014-03-01,101.0708
+2014-04-01,93.0044
+2014-05-01,88.4073
+2014-06-01,89.3661
+2014-07-01,88.0949
+2014-08-01,98.0799
+2014-09-01,106.8675
+2014-10-01,119.7665
+2014-11-01,129.0619
+2014-12-01,128.5528
+2015-01-01,109.9525
+2015-02-01,108.9073
+2015-03-01,106.5261
+2015-04-01,101.0631
+2015-05-01,96.7802
+2015-06-01,100.8339
+2015-07-01,102.8290
+2015-08-01,115.9030
+2015-09-01,115.8964
+2015-10-01,126.7440
+2015-11-01,124.5176
+2015-12-01,120.2374
+2016-01-01,108.5041
+2016-02-01,108.1308
+2016-03-01,107.9417
+2016-04-01,103.6179
+2016-05-01,102.0816
+2016-06-01,102.4044
+2016-07-01,102.9512
+2016-08-01,104.6977
+2016-09-01,109.3191
+2016-10-01,119.0502
+2016-11-01,116.8431
+2016-12-01,116.4535
+2017-01-01,109.4666
+2017-02-01,113.4661
+2017-03-01,105.2245
+2017-04-01,107.4288
+2017-05-01,101.9209
+2017-06-01,104.2022
+2017-07-01,102.5861
+2017-08-01,114.0613
diff --git a/tests/data/df_test_naive.csv b/tests/data/df_test_naive.csv
new file mode 100644
index 0000000..d023ed4
--- /dev/null
+++ b/tests/data/df_test_naive.csv
@@ -0,0 +1,79 @@
+date,y
+2017-01-29,0.4349205203965459
+2017-02-05,3.0939244374554384
+2017-02-12,3.570576295782257
+2017-02-19,3.563342711260862
+2017-02-26,6.385531690970693
+2017-03-05,7.1190596102394474
+2017-03-12,6.7996367386975365
+2017-03-19,6.835399190691371
+2017-03-26,7.108505735705831
+2017-04-02,7.057090295549315
+2017-04-09,6.7499156125551645
+2017-04-16,6.162630205418948
+2017-04-23,6.198688820948918
+2017-04-30,7.046710921413035
+2017-05-07,6.502776071560957
+2017-05-14,6.801603931195019
+2017-05-21,6.704787146284872
+2017-05-28,6.664921078362667
+2017-06-04,6.21757486453535
+2017-06-11,6.847429648511562
+2017-06-18,6.669050538105244
+2017-06-25,6.684271203224789
+2017-07-02,6.893301693784536
+2017-07-09,6.735451167048266
+2017-07-16,6.859307417620351
+2017-07-23,6.834707822624351
+2017-07-30,6.793872366111498
+2017-08-06,6.822959736610834
+2017-08-13,6.74675938333047
+2017-08-20,6.517230286438933
+2017-08-27,6.812962761898247
+2017-09-03,6.557346777379345
+2017-09-10,7.301625554518906
+2017-09-17,7.071100098663302
+2017-09-24,6.8941645008092305
+2017-10-01,7.170365759931321
+2017-10-08,6.842752321704603
+2017-10-15,7.190685255413404
+2017-10-22,7.1968926085980005
+2017-10-29,6.832378325837326
+2017-11-05,7.186430516910802
+2017-11-12,7.189336007245643
+2017-11-19,7.274336435808509
+2017-11-26,7.395746721528261
+2017-12-03,7.392821166940019
+2017-12-10,7.573502955309098
+2017-12-17,7.563499486808907
+2017-12-24,7.15602503364969
+2017-12-31,4.950378683070151
+2018-01-07,6.4613873176891286
+2018-01-14,7.23745630660972
+2018-01-21,7.413771393172441
+2018-01-28,7.290036969811424
+2018-02-04,7.330240616312069
+2018-02-11,7.257741037653408
+2018-02-18,6.943758719510568
+2018-02-25,7.155971064420781
+2018-03-04,8.466230408426785
+2018-03-11,7.452759660447607
+2018-03-18,7.435772673953147
+2018-03-25,7.390361816944939
+2018-04-01,6.86589826083204
+2018-04-08,6.662158701811143
+2018-04-15,7.165209780232458
+2018-04-22,7.120109845677651
+2018-04-29,7.402460513091829
+2018-05-06,7.381854799086771
+2018-05-13,6.774638820831432
+2018-05-20,7.074164419607107
+2018-05-27,7.170134647803297
+2018-06-03,6.536887800141038
+2018-06-10,7.2318692224535
+2018-06-17,7.202142719034337
+2018-06-24,6.926316852627774
+2018-07-01,7.072639779280713
+2018-07-08,7.097870985414842
+2018-07-15,6.9114185617021375
+2018-07-22,6.951945268558375
diff --git a/tests/data/df_test_naive2.csv b/tests/data/df_test_naive2.csv
new file mode 100644
index 0000000..f5769bd
--- /dev/null
+++ b/tests/data/df_test_naive2.csv
@@ -0,0 +1,58 @@
+date,source,x,y
+2017-06-25,P-03-In,0,0.009199999894927336
+2017-07-02,P-03-In,1,0.009199999986959702
+2017-07-09,P-03-In,2,0.009200000000498108
+2017-07-16,P-03-In,3,0.009199999888299563
+2017-07-23,P-03-In,4,0.009200000010290943
+2017-07-30,P-03-In,5,0.009200000022657678
+2017-08-06,P-03-In,6,0.009199999984631519
+2017-08-13,P-03-In,7,0.00919999996280566
+2017-08-20,P-03-In,8,0.009199999994843337
+2017-08-27,P-03-In,9,0.009199999971220093
+2017-09-03,P-03-In,10,0.009200000030683091
+2017-09-10,P-03-In,11,0.009200000002552809
+2017-09-17,P-03-In,12,0.00919999995405611
+2017-09-24,P-03-In,13,0.009199999976381365
+2017-10-01,P-03-In,14,0.0092000000324407
+2017-10-08,P-03-In,15,0.009199999945804973
+2017-10-15,P-03-In,16,0.00919999997597581
+2017-10-22,P-03-In,17,0.009199999979020355
+2017-10-29,P-03-In,18,0.0092000000185485
+2017-11-05,P-03-In,19,0.009200000007130841
+2017-11-12,P-03-In,20,0.009199999989690455
+2017-11-19,P-03-In,21,0.009200000020362446
+2017-11-26,P-03-In,22,0.009200000028438795
+2017-12-03,P-03-In,23,0.009200000038151055
+2017-12-10,P-03-In,24,0.009199999937747427
+2017-12-17,P-03-In,25,0.00919999998062174
+2017-12-24,P-03-In,26,0.009200000003846808
+2017-12-31,P-03-In,27,0.009200000053336922
+2018-01-07,P-03-In,28,0.00919999997026424
+2018-01-14,P-03-In,29,0.009199999966684136
+2018-01-21,P-03-In,30,0.009200000040870715
+2018-01-28,P-03-In,31,0.00920000000311432
+2018-02-04,P-03-In,32,0.009199999981294036
+2018-02-11,P-03-In,33,0.009200000000277988
+2018-02-18,P-03-In,34,0.009199999959599527
+2018-02-25,P-03-In,35,0.009199999980314294
+2018-03-04,P-03-In,36,0.009199999973300394
+2018-03-11,P-03-In,37,0.009200000037236106
+2018-03-18,P-03-In,38,0.00920000001901414
+2018-03-25,P-03-In,39,0.00920000005450935
+2018-04-01,P-03-In,40,0.009200000019079686
+2018-04-08,P-03-In,41,0.009199999980682175
+2018-04-15,P-03-In,42,0.00919999991890617
+2018-04-22,P-03-In,43,0.009199999955175744
+2018-04-29,P-03-In,44,0.009199999971244061
+2018-05-06,P-03-In,45,0.05582210075845062
+2018-05-13,P-03-In,46,0.06648076233741707
+2018-05-20,P-03-In,47,0.06868591669154471
+2018-05-27,P-03-In,48,0.06702831154113441
+2018-06-03,P-03-In,49,0.06796620179936401
+2018-06-10,P-03-In,50,0.06790846507858844
+2018-06-17,P-03-In,51,0.06751835805547822
+2018-06-24,P-03-In,52,0.06994368168797657
+2018-07-01,P-03-In,53,0.06844987136927888
+2018-07-08,P-03-In,54,0.07056084702443666
+2018-07-15,P-03-In,55,0.06925717368373535
+2018-07-22,P-03-In,56,0.07054481867982176
diff --git a/tests/data/test_normalize.csv b/tests/data/test_normalize.csv
new file mode 100644
index 0000000..fdeee3d
--- /dev/null
+++ b/tests/data/test_normalize.csv
@@ -0,0 +1,45 @@
+date,y
+2015-01-01,0
+2015-02-01,1
+2015-03-01,2
+2015-04-01,3
+2015-05-01,4
+2015-06-01,5
+2015-07-01,6
+2015-08-01,7
+2015-09-01,8
+2015-10-01,9
+2015-11-01,10
+2015-12-01,11
+2016-01-01,12
+2016-02-01,13
+2016-03-01,14
+2016-04-01,15
+2016-05-01,16
+2016-06-01,17
+2016-07-01,18
+2016-08-01,19
+2016-09-01,20
+2016-10-01,21
+2016-11-01,22
+2016-12-01,23
+2017-01-01,24
+2017-02-01,25
+2017-03-01,26
+2017-04-01,27
+2017-05-01,28
+2017-06-01,29
+2017-07-01,30
+2017-08-01,31
+2017-09-01,32
+2017-10-01,33
+2017-11-01,34
+2017-12-01,35
+2018-01-01,36
+2018-02-01,37
+2018-03-01,38
+2018-04-01,39
+2018-05-01,40
+2018-06-01,41
+2018-07-01,42
+2018-08-01,43
diff --git a/tests/test_forecast.py b/tests/test_forecast.py
new file mode 100644
index 0000000..53333a7
--- /dev/null
+++ b/tests/test_forecast.py
@@ -0,0 +1,2513 @@
+"""
+
+Author: Pedro Capelastegui
+Created on 04/12/2015
+
+"""
+
+import platform
+import os
+import logging
+import unittest
+import pandas as pd, numpy as np
+
+from anticipy.model_utils import interpolate_df
+from anticipy.utils_test import PandasTest
+from anticipy.forecast import *
+
+# Dask dependencies - not currently used
+# from dask import delayed
+# from dask import compute
+# from dask.distributed import Client
+# from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler
+# from dask.diagnostics import visualize
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+def logger_info(msg, data):
+    logger.info(msg + '\n%s\n', data)
+
+
+base_folder = os.path.join(os.path.dirname(__file__), 'data')
+
+pd.set_option('display.max_columns', 40)
+pd.set_option('display.max_rows', 200)
+pd.set_option('display.width', 1000)
+
+
+def list_to_str(l):
+    if isinstance(l, list):
+        return str([str(i) for i in l])
+    else:
+        return str(l)
+
+def array_ones_in_indices(n, l_indices):
+    return np.isin(np.arange(0, n), l_indices).astype(float)
+
+def array_zeros_in_indices(n, l_indices):
+    return (~np.isin(np.arange(0, n), l_indices)).astype(float)
+
+def print_forecast_driver_output(fcast_driver_output, log_first_line=None):
+    if fcast_driver_output.empty:
+        logger.info('Error: empty output')
+    else:
+        if log_first_line is not None:
+            log_first_line = '\r\n' + log_first_line
+        else:
+            log_first_line = ''
+        logger.info(log_first_line + '\r\nAIC_C:' + str(fcast_driver_output.dict_aic_c))
+        # logger_info('AIC_C:',fcast_driver_output[0])
+
+# usage:
+# compute_prof(l_dict_result2_d, scheduler = 'processes', num_workers=4, title='Test figure')
+def compute_prof(*args, **kwargs ):
+    with Profiler() as prof, ResourceProfiler(dt=0.25) as rprof:
+        out = compute(*args, **kwargs)
+    visualize([prof, rprof,# cprof
+               ], show=True)
+    return out
+
+
+class TestForecast(PandasTest):
+    def setUp(self):
+        pass
+
+    def test_normalize_df(self):
+
+        def run_test(df, df_expected, **kwargs):
+            df_out = normalize_df(df, **kwargs)
+            logger_info('df_out:', df_out.tail(10))
+            self.assert_frame_equal(df_out, df_expected)
+
+        a_y = np.full(10, 0.0)
+        a_x = np.arange(0, 10).astype(np.int64)
+        a_x2 = np.tile(np.arange(0, 5), 2).astype(np.int64)
+        a_x2_out = np.repeat(np.arange(0, 5), 2).astype(np.int64)
+        a_source = ['s1'] * 5 + ['s2'] * 5
+        a_weight = np.full(10, 1.0)
+        a_date = pd.date_range('2014-01-01', periods=10, freq='D')
+        a_date2 = np.tile(pd.date_range('2014-01-01', periods=5, freq='D'), 2)
+        a_date2_out = np.repeat(pd.date_range('2014-01-01', periods=5, freq='D'), 2)
+
+        logger_info('DEBUG: ', a_date2)
+
+        # Test 0: Empty input
+
+        self.assertIsNone(normalize_df(pd.DataFrame))
+
+        # Test 1: Output with x,y columns
+        df_expected = pd.DataFrame({'y': a_y, 'x': a_x, })[['x', 'y']]
+
+        l_input = [
+            [pd.DataFrame({'y': a_y}), {}],
+            [pd.DataFrame({'y': a_y, 'x': a_x}), {}],
+            [pd.DataFrame({'y_test': a_y, 'x_test': a_x}), {'col_name_y': 'y_test', 'col_name_x': 'x_test'}]
+        ]
+        for df, kwargs in l_input:
+            run_test(df, df_expected, **kwargs)
+
+        # Test 2: Output with x,y,weight columns
+        df_expected = pd.DataFrame({'y': a_y, 'x': a_x, 'weight': a_weight})[['x', 'y', 'weight']]
+
+        l_input = [
+            [pd.DataFrame({'y': a_y, 'weight': a_weight}), {}],
+            [pd.DataFrame({'y': a_y, 'x': a_x, 'weight': a_weight}), {}],
+            [pd.DataFrame({'y_test': a_y, 'x_test': a_x, 'weight_test': a_weight}),
+             {'col_name_y': 'y_test', 'col_name_x': 'x_test', 'col_name_weight': 'weight_test'}]
+        ]
+        for df, kwargs in l_input:
+            run_test(df, df_expected, **kwargs)
+
+        # Test 3: Output with x,y,weight,date columns
+        df_expected = pd.DataFrame({'y': a_y, 'x': a_x, 'weight': a_weight, 'date': a_date})[
+            ['date', 'x', 'y', 'weight']]
+
+        l_input = [
+            [pd.DataFrame({'y': a_y, 'weight': a_weight, 'date': a_date}), {}],
+            [pd.DataFrame({'y': a_y, 'weight': a_weight}, index=a_date), {}],
+            [pd.DataFrame({'y': a_y, 'x': a_x, 'weight': a_weight, 'date': a_date}), {}],
+            [pd.DataFrame({'y_test': a_y, 'x_test': a_x, 'weight_test': a_weight, 'date_test': a_date}),
+             {'col_name_y': 'y_test', 'col_name_x': 'x_test', 'col_name_weight': 'weight_test',
+              'col_name_date': 'date_test'}]
+        ]
+        for df, kwargs in l_input:
+            run_test(df, df_expected, **kwargs)
+
+        # Test 4: Input series
+        df_expected = pd.DataFrame({'y': a_y, 'x': a_x, })[['x', 'y']]
+
+        l_input = [
+            [pd.Series(a_y, name='y'), {}],
+            [pd.Series(a_y, name='y', index=a_x), {}],
+            [pd.Series(a_y, name='y_test'), {'col_name_y': 'y_test'}],
+            # [pd.DataFrame({'y_test': a_y, 'x_test': a_x}), {'col_name_y':'y_test','col_name_x':'x_test'}]
+        ]
+        for df, kwargs in l_input:
+            run_test(df, df_expected, **kwargs)
+
+        # Test 5: Input series with datetimeindex
+        df_expected = pd.DataFrame({'y': a_y, 'x': a_x, 'date': a_date})[['date', 'x', 'y']]
+
+        l_input = [
+            [pd.Series(a_y, name='y', index=a_date), {}],
+            [pd.Series(a_y, name='y_test', index=a_date), {'col_name_y': 'y_test'}],
+            # [pd.DataFrame({'y_test': a_y, 'x_test': a_x}), {'col_name_y':'y_test','col_name_x':'x_test'}]
+        ]
+        for df, kwargs in l_input:
+            run_test(df, df_expected, **kwargs)
+
+        # Test 6: Input df, output with x, y, weight, date, source columns
+        df_expected = (
+            pd.DataFrame({'y': a_y, 'x':a_x2, 'source': a_source, 'weight':a_weight,'date':a_date2})
+            [['date','source','x','y','weight']]
+        )
+
+        l_input = [
+            [pd.DataFrame({'y': a_y, 'weight':a_weight, 'date':a_date2, 'source': a_source}),{}],
+            # Datetime index not supported with source - could be added back with multindex
+            #[pd.DataFrame({'y': a_y, 'weight': a_weight},index = a_date), {}],
+            [pd.DataFrame({'y': a_y, 'x': a_x2, 'weight': a_weight,'source':a_source,'date':a_date2}), {}],
+            [pd.DataFrame({'y_test': a_y, 'x_test': a_x2, 'weight_test':a_weight, 'date_test':a_date2,
+                           'source_test':a_source}),
+             {'col_name_y':'y_test','col_name_x':'x_test','col_name_weight':'weight_test', 'col_name_date':'date_test',
+              'col_name_source':'source_test'}]
+        ]
+        for df, kwargs in l_input:
+            run_test(df, df_expected, **kwargs)
+
+        # Test 7: Input df has multiple values per date per source
+        df_expected = (
+            pd.DataFrame({'y': a_y, 'x':a_x2_out, 'weight':a_weight,'date':a_date2_out})
+            [['date','x','y','weight']]
+        )
+
+        l_input = [
+            [pd.DataFrame({'y': a_y, 'weight': a_weight, 'date': a_date2}), {}],
+            # Datetime index not supported with source - could be added back with multindex
+            [pd.DataFrame({'y': a_y, 'x': a_x2, 'weight': a_weight, 'date': a_date2}), {}],
+            [pd.DataFrame({'y_test': a_y, 'x_test': a_x2, 'weight_test': a_weight, 'date_test': a_date2,
+                           }),
+             {'col_name_y': 'y_test', 'col_name_x': 'x_test', 'col_name_weight': 'weight_test',
+              'col_name_date': 'date_test',
+              }]
+        ]
+        for df, kwargs in l_input:
+            run_test(df, df_expected, **kwargs)
+
+        # Test 8: input df has date column in string form
+        a_date_str = a_date2.astype(str)
+        df_expected = (
+            pd.DataFrame({'y': a_y, 'x':a_x2, 'source': a_source, 'weight':a_weight,'date':a_date2})
+            [['date','source','x','y','weight']]
+        )
+
+        l_input = [
+            [pd.DataFrame({'y': a_y, 'weight': a_weight, 'date': a_date_str, 'source': a_source}), {}],
+            [pd.DataFrame({'y': a_y, 'x': a_x2, 'weight': a_weight, 'source': a_source, 'date': a_date_str}), {}],
+            [pd.DataFrame({'y_test': a_y, 'x_test': a_x2, 'weight_test': a_weight, 'date_test': a_date_str,
+                           'source_test': a_source}),
+             {'col_name_y': 'y_test', 'col_name_x': 'x_test', 'col_name_weight': 'weight_test',
+              'col_name_date': 'date_test',
+              'col_name_source': 'source_test'}]
+        ]
+        for df, kwargs in l_input:
+            run_test(df, df_expected, **kwargs)
+
+        # Test 9: unordered input df
+
+        df_expected = pd.DataFrame({'y': a_y, 'x':a_x,})[['x','y']]
+
+        l_input = [
+            [pd.DataFrame({'y': a_y[::-1]}),{}],
+            [pd.DataFrame({'y': a_y[::-1], 'x': a_x[::-1]}), {}],
+        ]
+        for df, kwargs in l_input:
+            run_test(df, df_expected, **kwargs)
+
+        # Test 10: candy production dataset
+        path_candy = os.path.join(base_folder, 'candy_production.csv')
+        df_candy_raw = pd.read_csv(path_candy)
+        df_candy = df_candy_raw.pipe(normalize_df,
+                                     col_name_y='IPG3113N', col_name_date='observation_date')
+        logger_info('df_candy:', df_candy.tail())
+
+        # Test 11: test_normalize.csv
+
+        path_file = os.path.join(base_folder, 'test_normalize.csv')
+        df_test_raw = pd.read_csv(path_file)
+        df_test = df_test_raw.pipe(normalize_df,)
+        logger_info('df_test:', df_test.x.diff().loc[df_test.x.diff()>1.0])
+        self.assertFalse((df_test.x.diff()>1.0).any())
+
+        # Test 11b: test_normalize.csv, with gaps
+
+        path_file = os.path.join(base_folder, 'test_normalize.csv')
+        df_test_raw = pd.read_csv(path_file)
+        df_test_raw = pd.concat([df_test_raw.head(10), df_test_raw.tail(10)])
+        df_test = df_test_raw.pipe(normalize_df,)
+        logger_info('df_test:',df_test)
+        logger_info('df_test:', df_test.x.diff().loc[df_test.x.diff()>1.0])
+        self.assertTrue((df_test.x.max()==43))
+
+    def test_interpolate_df(self):
+
+        # # Test 1: DF with date column, gap
+        # a_y = np.arange(0,10.)
+        # a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        # df_expected = pd.DataFrame({'y': a_y, 'date': a_date}).pipe(normalize_df)
+        # df = pd.concat([df_expected.head(5), df_expected.tail(-6)]).pipe(normalize_df)
+        #
+        # df_result = df.pipe(interpolate_df)
+        # logger_info('df_result:', df_result)
+        # self.assert_frame_equal(df_result, df_expected)
+        #
+        # df_result = df.pipe(interpolate_df, include_mask=True)
+        #
+        # # Test 1: DF with no date column, gap
+        # a_y = np.arange(0,10.)
+        # a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        # df_expected = pd.DataFrame({'y': a_y}).pipe(normalize_df)
+        # df = pd.concat([df_expected.head(5), df_expected.tail(-6)]).pipe(normalize_df)
+        #
+        # df_result = df.pipe(interpolate_df)
+        # logger_info('df_result:', df_result)
+        # self.assert_frame_equal(df_result, df_expected)
+        #
+        # df_result = df.pipe(interpolate_df, include_mask=True)
+        # logger_info('df_result:', df_result)
+
+
+        # Test 2: Sparse series with date gaps
+        df_test = pd.DataFrame({'date': pd.to_datetime(['2018-08-01', '2018-08-09']), 'y': [1., 2.]})
+        df_result = df_test.pipe(interpolate_df, include_mask=True)
+        logger_info('df_result:', df_result)
+        self.assertEqual(df_result.index.size,9)
+
+
+
+
+    def test_forecast_input(self):
+        y_values1 = pd.DataFrame({'a': np.full(100, 0.0),
+                                  'b': np.round(np.arange(-0.5, 0.5, 0.01), 2), },
+                                 index=pd.date_range('2014-01-01', periods=100, freq='D'))
+        # Too few samples
+        n = 4
+        y_values1b = pd.DataFrame({'a': np.full(n, 0.0)},
+                                  index=pd.date_range('2014-01-01', periods=n, freq='D'))
+
+        y_values2 = pd.DataFrame({'a': np.full(100, 0.0)},
+                                 index=pd.date_range('2014-01-01', periods=100, freq='D'))
+
+        # SolverConfig with trend
+        conf1 = ForecastInput(
+            source_id='source1',
+            l_model_trend=[forecast_models.model_constant, forecast_models.model_linear],
+            l_model_season=None, df_y=y_values1,
+            weights_y_values=1.0, date_start_actuals=None
+        )
+        logger_info('Solver config:', conf1)
+
+    def test_get_residuals(self):
+        # Linear model
+        model = forecast_models.model_linear
+        a_y = np.arange(10.0)
+        a_x = np.arange(10.0)
+        a_date = None
+        # Using parameter(0,0)
+        residuals = get_residuals([0, 0], model, a_x, a_y, a_date)
+        l_expected1 = np.arange(10.0)
+        logger_info('residuals:', residuals)
+        self.assert_array_equal(residuals, l_expected1)
+
+        # Test - If input array is not 1-dimensional, throw Exception
+        model = forecast_models.model_linear
+        a_y = pd.DataFrame({'a': np.arange(10.0), 'b': -np.arange(10.0)}).values
+        a_x = np.arange(10.0)
+        with self.assertRaises(AssertionError):
+            residuals = get_residuals([0, 0], model, a_x, a_y, a_date)
+
+        # Test - multiple values per sample
+        a_y = np.concatenate([np.arange(10.0), -np.arange(10.0)])
+        a_x = np.tile(np.arange(10.0), 2)
+
+        residuals = get_residuals([0, 0], model, a_x, a_y, a_date)
+        logger_info('residuals:', residuals)
+        l_expected2 = np.concatenate([np.arange(10.0), np.arange(10.0)])
+        self.assert_array_equal(residuals, l_expected2)
+
+        # As above, but applying weights to input time series [1.0, 0]
+        residuals = get_residuals([0, 0], model, a_x, a_y, a_date,
+                                  a_weights=np.repeat([1.0, 0], 10))
+        l_expected2b = np.concatenate([np.arange(10.0), np.full(10, 0)])
+        logger_info('residuals:', residuals)
+        self.assert_array_equal(residuals, l_expected2b)
+
+        # TODO: MORE TESTS WITH WEIGHTS_Y_VALUES
+
+        # New test, different parameters
+        residuals = get_residuals([0, 5], model, a_x, a_y, a_date)
+        logger_info('residuals:', residuals)
+        self.assert_array_equal(residuals,
+                                [5., 4., 3., 2., 1., 0., 1., 2., 3., 4., 5., 6., 7.,
+                                 8., 9., 10., 11., 12., 13., 14.])
+
+        # Test - Use a_weights to weight residuals based on time
+        # Using parameter(0,0)
+        a_y = np.arange(10.0)
+        a_x = np.arange(10.0)
+        a_weights = np.linspace(1., 2., 10)
+        logger_info('a_y: ', a_y)
+        logger_info('a_weights: ', a_weights)
+        residuals = get_residuals([0, 0], model, a_x, a_y, a_date, a_weights=a_weights)
+        self.assert_array_equal(residuals, np.arange(10.0) * a_weights)
+        logger_info('residuals:', residuals)
+
+    def test_optimize_least_squares(self):
+        # Setup
+        a_x = pd.np.arange(100.0)
+        a_y = np.arange(100.0)
+
+        a_x_long = np.tile(a_x, 2)
+        a_y_long = np.concatenate([np.full(100, 0.0),
+                                   np.round(np.arange(-0.5, 0.5, 0.01), 2)])
+        a_date = None
+
+        l_model = [
+            forecast_models.model_linear,
+            forecast_models.model_constant
+        ]
+
+        def print_result(result):
+            logger.info('result cost: %s, shape: %s, x: %s, message: %s',
+                        result.cost, result.fun.shape, result.x, result.message)
+
+        for model in l_model:
+            logger.info('#### Model function: %s', model.name)
+
+            df_result = optimize_least_squares(model, a_x, a_y, a_date)
+            logger_info('result:', df_result)
+            self.assertTrue(df_result.success.any())
+            # logger_info('result.x:',res_trend.x)
+
+            df_result = optimize_least_squares(model, a_x_long, a_y_long, a_date)
+            logger_info('result:', df_result)
+            self.assertTrue(df_result.success.any())
+
+    def test_fit_model(self):
+        # Input dataframes must have an y column, and may have columns x,date, weight
+
+        # Setup
+        # TODO: Use pre-normalized input dfs, rather than callling normalize_df()
+        dict_df_y = {
+            # Single ts
+            'df_1ts_nodate': pd.DataFrame({'y': np.full(100, 0.0),
+                                           'weight_test': np.full(100, 1.0)}),
+            # 2 ts
+            'df_2ts_nodate': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0),
+                                                                np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                           'weight_test': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                                           'x': np.tile(np.arange(0, 100), 2),
+                                           }),
+            # 1 ts with datetime index
+            'df_1ts_w': pd.DataFrame({'y': np.full(100, 0.0),
+                                      'weight_test': np.full(100, 1.0)
+                                      },
+                                     index=pd.date_range('2014-01-01', periods=100, freq='W')),
+            # 2 ts with datetime index
+            'df_2ts_w': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                      'weight_test': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                                      },
+                                     index=np.tile(pd.date_range('2014-01-01', periods=100, freq='W'), 2)),
+            # Single ts, freq=D
+            'df_1ts_d': pd.DataFrame({'y': np.full(100, 0.0),
+                                      'weight_test': np.full(100, 1.0)},
+                                     index=pd.date_range('2014-01-01', periods=100, freq='D')),
+            # 2 ts with datetime index, freq=D
+            'df_2ts_d': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                      'weight_test': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)])},
+                                     index=np.tile(pd.date_range('2014-01-01', periods=100, freq='D'), 2))
+        }
+        l_source1 = ['df_1ts_nodate', 'df_2ts_nodate', 'df_1ts_w', 'df_1ts_w', 'df_2ts_d', 'df_2ts_d']
+        l_source2 = ['df_1ts_d', 'df_2ts_d']
+
+        # Naive trend models - cannot add seasonality
+        l_model1a = [
+            forecast_models.model_naive, # model_naive never actually goes to fit_model
+            # TODO: add assert check on fit model re: validity of input model
+
+        ]
+
+        l_model1b = [
+            forecast_models.model_snaive_wday
+            # TODO: add assert check on fit model re: validity of input model
+
+        ]
+
+        l_model1c = [
+            forecast_models.model_linear,
+            forecast_models.model_constant
+        ]
+        # All trend models
+        l_model1 = l_model1a+l_model1b+l_model1c
+
+        l_model2 = [
+            forecast_models.model_season_wday,
+            forecast_models.model_season_wday_2,
+            forecast_models.model_season_month
+        ]
+        l_model3 = get_list_model(l_model1c, l_model2)
+
+        l_results = []
+        l_optimize_info = []
+
+        l_add_weight = [False, True]
+
+        def run_test_logic(source, model, add_weight):
+            df_y = dict_df_y[source].copy()
+            if add_weight:  # Enable weight column
+                df_y['weight'] = df_y['weight_test']
+            df_y = df_y.pipe(normalize_df)
+            logger.info('Fitting src: %s , mod: %s, add_weight: %s', source, model, add_weight)
+            dict_fit_model = fit_model(model, df_y,  source=source, df_actuals = df_y)
+            return dict_fit_model
+            # logger_info('Result: ',result)
+
+        # Test - single solver type, return best fit
+        for (source, model, add_weight) in itertools.product(
+                l_source1, l_model1a+l_model1c, l_add_weight):
+            dict_fit_model = run_test_logic(source, model, add_weight)
+            result_tmp = dict_fit_model['metadata']
+            info_tmp = dict_fit_model['optimize_info']
+            l_results += [result_tmp]
+            l_optimize_info += [info_tmp]
+
+        # Now for models that require datetimeindex
+        for (source, model, add_weight) in itertools.product(
+                l_source2, l_model1b+l_model2, l_add_weight):
+            dict_fit_model = run_test_logic(source, model, add_weight)
+            result_tmp = dict_fit_model['metadata']
+            info_tmp = dict_fit_model['optimize_info']
+            l_results += [result_tmp]
+            l_optimize_info += [info_tmp]
+
+        # Finally, we use trend+seasonality with all models
+        for (source, model, add_weight) in itertools.product(
+                l_source2, l_model3, l_add_weight):
+            dict_fit_model = run_test_logic(source, model, add_weight)
+            result_tmp = dict_fit_model['metadata']
+            info_tmp = dict_fit_model['optimize_info']
+            l_results += [result_tmp]
+            l_optimize_info += [info_tmp]
+
+        df_result = pd.concat(l_results, sort=False, ignore_index=True)
+        df_optimize_info = pd.concat(l_optimize_info, sort=False, ignore_index=True)
+
+        self.assertFalse(df_result.cost.pipe(pd.isnull).any())
+
+        logger_info('Result summary:', df_result)
+        logger_info('Optimize info summary:', df_optimize_info)
+
+    @unittest.skip('Dask not supported yet')
+    def test_fit_model_dask(self):
+        # Input dataframes must have an y column, and may have columns x,date, weight
+
+        # Setup
+        # TODO: Use pre-normalized input dfs, rather than callling normalize_df()
+        dict_df_y = {
+            # Single ts
+            'df_1ts_nodate': pd.DataFrame({'y': np.full(100, 0.0),
+                                           'weight_test': np.full(100, 1.0)}),
+            # 2 ts
+            'df_2ts_nodate': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0),
+                                                                np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                           'weight_test': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                                           'x': np.tile(np.arange(0, 100), 2),
+                                           }),
+            # 1 ts with datetime index
+            'df_1ts_w': pd.DataFrame({'y': np.full(100, 0.0),
+                                      'weight_test': np.full(100, 1.0)
+                                      },
+                                     index=pd.date_range('2014-01-01', periods=100, freq='W')),
+            # 2 ts with datetime index
+            'df_2ts_w': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                      'weight_test': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                                      },
+                                     index=np.tile(pd.date_range('2014-01-01', periods=100, freq='W'), 2)),
+            # Single ts, freq=D
+            'df_1ts_d': pd.DataFrame({'y': np.full(100, 0.0),
+                                      'weight_test': np.full(100, 1.0)},
+                                     index=pd.date_range('2014-01-01', periods=100, freq='D')),
+            # 2 ts with datetime index, freq=D
+            'df_2ts_d': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                      'weight_test': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)])},
+                                     index=np.tile(pd.date_range('2014-01-01', periods=100, freq='D'), 2))
+        }
+        l_source1 = ['df_1ts_nodate', 'df_2ts_nodate', 'df_1ts_w', 'df_1ts_w', 'df_2ts_d', 'df_2ts_d']
+        l_source2 = ['df_1ts_d', 'df_2ts_d']
+
+        l_model1 = [
+            forecast_models.model_naive, # model_naive never actually goes to fit_model
+            # TODO: add assert check on fit model re: validity of input model
+            forecast_models.model_linear,
+            forecast_models.model_constant
+        ]
+        l_model2 = [
+            forecast_models.model_season_wday,
+            forecast_models.model_season_wday_2,
+            forecast_models.model_season_month
+        ]
+        l_model3 = get_list_model(l_model1, l_model2)
+
+        l_add_weight = [False, True]
+
+        def run_test_logic(df_y, source, model, add_weight):
+            #df_y = dict_df_y[source].copy()
+            #if add_weight:  # Enable weight column
+            #    df_y['weight']=df_y['weight_test']
+            col_name_weight = 'weight' if add_weight==True else 'no-weight'
+            df_y = df_y.pipe(normalize_df, col_name_weight=col_name_weight)
+            #logger.info('Fitting src: %s , mod: %s, add_weight: %s', source, model, add_weight)
+            #dict_fit_model = delayed(fit_model)(model, df_y,  source=source, df_actuals = df_y)
+            dict_fit_model = fit_model(model, df_y, source=source, df_actuals=df_y)
+            return dict_fit_model
+            # logger_info('Result: ',result)
+
+        def aggregate_dict_fit_model(l_dict_fit_model):
+            l_results = []
+            l_optimize_info = []
+            for dict_fit_model in l_dict_fit_model:
+                result_tmp = dict_fit_model['metadata']
+                info_tmp = dict_fit_model['optimize_info']
+                l_results += [result_tmp]
+                l_optimize_info += [info_tmp]
+            df_metadata = pd.concat(l_results, sort=False, ignore_index=True)
+            df_optimize_info = pd.concat(l_optimize_info, sort=False, ignore_index=True)
+            return df_metadata, df_optimize_info
+
+        l_dict_fit_model_d = []
+
+        # Test - single solver type, return best fit
+        for (source, model, add_weight) in itertools.product(
+                l_source1, l_model1, l_add_weight):
+            l_dict_fit_model_d += [delayed(run_test_logic)(dict_df_y[source].copy(), source, model, add_weight)]
+
+
+        # Now for models that require datetimeindex
+        for (source, model, add_weight) in itertools.product(
+                l_source2, l_model2, l_add_weight):
+            l_dict_fit_model_d += [delayed(run_test_logic)(dict_df_y[source].copy(), source, model, add_weight)]
+
+        # Finally, we use trend+seasonality with all models
+        for (source, model, add_weight) in itertools.product(
+                l_source2, l_model3, l_add_weight):
+            l_dict_fit_model_d += [delayed(run_test_logic)(dict_df_y[source].copy(), source, model, add_weight)]
+
+        logger.info('generated delayed')
+
+        #client = Client()
+        #logger_info('client:',client)
+        #l_dict_fit_model, = compute(l_dict_fit_model_d)
+        l_dict_fit_model, = compute_prof(l_dict_fit_model_d, scheduler='processes', num_workers=4)
+        #l_dict_fit_model, = compute(l_dict_fit_model_d, scheduler='processes', num_workers=4)
+        #l_dict_fit_model, = compute(l_dict_fit_model_d, scheduler='distributed', num_workers=4)
+        #l_dict_fit_model, = compute(l_dict_fit_model_d, scheduler='threads', num_workers=4)
+        #l_dict_fit_model = l_dict_fit_model_d
+
+        df_metadata, df_optimize_info = aggregate_dict_fit_model(l_dict_fit_model)
+        #result_d = delayed(aggregate_dict_fit_model)(l_dict_fit_model_d)
+        #result_d = delayed(aggregate_dict_fit_model)(l_dict_fit_model_d)
+        #(df_metadata, df_optimize_info), = compute(result_d)
+        # result, = compute(result_d)
+        logger_info('Result summary:', df_metadata)
+        logger_info('Optimize info summary:', df_optimize_info)
+        #client.close()
+
+    @unittest.skip('Dask not supported yet')
+    def test_fit_model_dask2(self):
+        # Input dataframes must have an y column, and may have columns x,date, weight
+
+        # Setup
+        # TODO: Use pre-normalized input dfs, rather than callling normalize_df()
+        dict_df_y = {
+            # Single ts
+            'df_1ts_nodate': pd.DataFrame({'y': np.full(100, 0.0),
+                                           'weight_test': np.full(100, 1.0)}),
+            # 2 ts
+            'df_2ts_nodate': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0),
+                                                                np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                           'weight_test': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                                           'x': np.tile(np.arange(0, 100), 2),
+                                           }),
+            # 1 ts with datetime index
+            'df_1ts_w': pd.DataFrame({'y': np.full(100, 0.0),
+                                      'weight_test': np.full(100, 1.0)
+                                      },
+                                     index=pd.date_range('2014-01-01', periods=100, freq='W')),
+            # 2 ts with datetime index
+            'df_2ts_w': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                      'weight_test': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                                      },
+                                     index=np.tile(pd.date_range('2014-01-01', periods=100, freq='W'), 2)),
+            # Single ts, freq=D
+            'df_1ts_d': pd.DataFrame({'y': np.full(100, 0.0),
+                                      'weight_test': np.full(100, 1.0)},
+                                     index=pd.date_range('2014-01-01', periods=100, freq='D')),
+            # 2 ts with datetime index, freq=D
+            'df_2ts_d': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                      'weight_test': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)])},
+                                     index=np.tile(pd.date_range('2014-01-01', periods=100, freq='D'), 2))
+        }
+        l_source1 = ['df_1ts_nodate', 'df_2ts_nodate', 'df_1ts_w', 'df_1ts_w', 'df_2ts_d', 'df_2ts_d']
+        l_source2 = ['df_1ts_d', 'df_2ts_d']
+
+        l_model1 = [
+            forecast_models.model_naive, # model_naive never actually goes to fit_model
+            # TODO: add assert check on fit model re: validity of input model
+            forecast_models.model_linear,
+            forecast_models.model_constant
+        ]
+        l_model2 = [
+            forecast_models.model_season_wday,
+            forecast_models.model_season_wday_2,
+            forecast_models.model_season_month
+        ]
+        l_model3 = get_list_model(l_model1, l_model2)
+
+        l_weight = ['no-weight', 'weight_test']
+
+        def run_test_logic(df_y, source, model, add_weight):
+            #df_y = dict_df_y[source].copy()
+            if add_weight:  # Enable weight column
+                df_y['weight']=df_y['weight_test']
+            df_y = df_y.pipe(normalize_df)
+            #logger.info('Fitting src: %s , mod: %s, add_weight: %s', source, model, add_weight)
+            #dict_fit_model = delayed(fit_model)(model, df_y,  source=source, df_actuals = df_y)
+            dict_fit_model = fit_model(model, df_y, source=source, df_actuals=df_y)
+            return dict_fit_model
+            # logger_info('Result: ',result)
+
+        def aggregate_dict_fit_model(l_dict_fit_model):
+            l_results = []
+            l_optimize_info = []
+            for dict_fit_model in l_dict_fit_model:
+                result_tmp = dict_fit_model['metadata']
+                info_tmp = dict_fit_model['optimize_info']
+                l_results += [result_tmp]
+                l_optimize_info += [info_tmp]
+            df_metadata = delayed(pd.concat)(l_results, sort=False, ignore_index=False)
+            df_optimize_info = delayed(pd.concat)(l_optimize_info, sort=False, ignore_index=False)
+            return df_metadata, df_optimize_info
+
+        l_dict_fit_model_d = []
+
+        # Test - single solver type, return best fit
+
+
+        l_dict_fit_model_d += [
+            delayed(fit_model)(model,
+                               dict_df_y[source].pipe(delayed(normalize_df), col_name_weight=weight),
+                               source=source, df_actuals=dict_df_y[source].pipe(delayed(normalize_df), col_name_weight=weight))
+            for (source, model, weight) in itertools.product(l_source1, l_model1, l_weight)]
+
+        # Now for models that require datetimeindex
+        l_dict_fit_model_d += [
+            delayed(fit_model)(model,
+                               dict_df_y[source].pipe(delayed(normalize_df), col_name_weight=weight),
+                               source=source, df_actuals=dict_df_y[source].pipe(delayed(normalize_df), col_name_weight=weight))
+            for (source, model, weight) in itertools.product(l_source2, l_model2, l_weight)]
+
+        # Finally, we use trend+seasonality with all models
+        l_dict_fit_model_d += [
+            delayed(fit_model)(model,
+                               dict_df_y[source].pipe(delayed(normalize_df), col_name_weight=weight),
+                               source=source, df_actuals=dict_df_y[source].pipe(delayed(normalize_df), col_name_weight=weight))
+            for (source, model, weight) in itertools.product( l_source2, l_model3, l_weight)]
+
+        # # Finally, we use trend+seasonality with all models
+        # for (source, model, weight) in itertools.product(
+        #         l_source2, l_model3, l_weight):
+        #     df_y = dict_df_y[source].pipe(normalize_df, col_name_weight=weight)
+        #     l_dict_fit_model_d += [delayed(fit_model)(model, df_y, source=source, df_actuals=df_y)]
+
+        logger.info('generated delayed')
+
+        #l_dict_fit_model, = compute(l_dict_fit_model_d)
+        l_dict_fit_model = l_dict_fit_model_d
+
+        #df_metadata, df_optimize_info = aggregate_dict_fit_model(l_dict_fit_model)
+        result_d = delayed(aggregate_dict_fit_model)(l_dict_fit_model_d)
+        (df_metadata, df_optimize_info), = compute(result_d)
+        # result, = compute(result_d)
+        logger_info('Result summary:', df_metadata)
+        logger_info('Optimize info summary:', df_optimize_info)
+
+    @unittest.skip('Dask not supported yet')
+    def test_dask(self):
+        def aggregate_result(l_dict_result):
+            l_metadata = []
+            l_opt = []
+            for dict_result in l_dict_result:
+                l_metadata += [dict_result['metadata']]
+                l_opt += [dict_result['optimize_info']]
+            return pd.concat(l_metadata, sort=False, ignore_index=False), pd.concat(l_opt, sort=False,
+                                                                                    ignore_index=False)
+
+        model = forecast_models.model_linear
+        df_y = pd.DataFrame({'y': np.full(100, 0.0), 'weight_test': np.full(100, 1.0)}).pipe(normalize_df)
+
+        l_dict_result2_d = [delayed(fit_model)(model, df_y, source=i, df_actuals=df_y) for i in np.arange(0, 20)]
+        result_d = delayed(aggregate_result)(l_dict_result2_d)
+        #result = compute(result_d,scheduler='processes',num_workers=2)
+        result = compute(result_d)
+        logger_info('result',result)
+
+
+    def test_fit_model_date_gaps(self):
+        # Setup
+        # 2 ts with datetime index, freq=D
+        df_2ts_d = pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                 'weight': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)])},
+                                index=np.tile(pd.date_range('2014-01-01', periods=100, freq='D'), 2))
+
+        df_y = pd.concat([df_2ts_d.head(), df_2ts_d.tail()])
+
+        model = forecast_models.model_linear
+
+        l_col_name_weight = [None, 'weight']
+
+        l_results = []
+        l_optimize_info = []
+
+        def run_test_logic(col_name_weight):
+            logger.info('Fitting col_w: %s', col_name_weight)
+            df_y_tmp = df_y.pipe(normalize_df,col_name_weight=col_name_weight)
+            dict_fit_model = fit_model(model, df_y_tmp, source='test')
+            return dict_fit_model
+            # logger_info('Result: ',result)
+
+        # Test - single solver type, return best fit
+        for col_name_weight in l_col_name_weight:
+            dict_fit_model = run_test_logic(col_name_weight)
+            result_tmp = dict_fit_model['metadata']
+            info_tmp = dict_fit_model['optimize_info']
+            l_results += [result_tmp]
+            l_optimize_info += [info_tmp]
+
+        df_result = pd.concat(l_results)
+        df_optimize_info = pd.concat(l_optimize_info)
+        logger_info('Result summary:', df_result)
+        logger_info('Optimize info summary:', df_optimize_info)
+
+    def test_get_list_model(self):
+        l1 = [
+            forecast_models.model_linear,
+            forecast_models.model_constant
+        ]
+        l2 = [
+            forecast_models.model_season_wday_2,
+            forecast_models.model_null
+        ]
+        l_result_add = get_list_model(l1, l2, 'add')
+        l_result_mult = get_list_model(l1, l2, 'mult')
+        l_result_both = get_list_model(l1, l2, 'both')
+
+        l_expected_add = [
+            l1[0] + l2[0],
+            l1[0] + l2[1],
+            l1[1] + l2[0],
+            l1[1] + l2[1],
+        ]
+
+        l_expected_mult = [
+            l1[0] * l2[0],
+            l1[0] * l2[1],
+            l1[1] * l2[0],
+            l1[1] * l2[1],
+        ]
+        l_expected_both = [
+            l1[0] + l2[0],
+            l1[0] + l2[1],
+            l1[1] + l2[0],
+            l1[1] + l2[1],
+            l1[0] * l2[0],
+            # l1[0] * l2[1], # This is a duplicate: linear*null = linear+null = linear
+            l1[1] * l2[0],
+            # l1[1] * l2[1], # This is a duplicate: constant*null = constant+null = constant
+        ]
+        logger_info('Result add:', l_result_add)
+        logger_info('Expected add:', l_expected_add)
+        self.assertListEqual(l_result_add, l_expected_add)
+
+        logger_info('Result mult:', l_result_mult)
+        logger_info('Expected mult:', l_expected_mult)
+        self.assertListEqual(l_result_mult, l_expected_mult)
+
+        logger_info('Result both:', l_result_both)
+        logger_info('Expected both:', l_expected_both)
+        self.assertListEqual(l_result_both, l_expected_both)
+
+    def test_fit_model_trend_season_wday_mult(self):
+        # Test Specific model combination that doesn't fit
+
+        # Setup
+        n_iterations = 10
+
+        # Setup
+        dict_df_y = {
+            # Single ts
+            'df_1ts_nodate': pd.DataFrame({'y': np.full(100, 0.0),
+                                           'weight': np.full(100, 1.0)}),
+            # 2 ts
+            'df_2ts_nodate': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0),
+                                                                np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                           'weight': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                                           'x': np.tile(np.arange(0, 100), 2),
+                                           }),
+            # 1 ts with datetime index
+            'df_1ts_w': pd.DataFrame({'y': np.full(100, 0.0),
+                                      'weight': np.full(100, 1.0)
+                                      },
+                                     index=pd.date_range('2014-01-01', periods=100, freq='W')),
+
+            # 2 ts with datetime index
+            'df_2ts_w': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                      'weight': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                                      },
+                                     index=np.tile(pd.date_range('2014-01-01', periods=100, freq='W'), 2)),
+            # Single ts, freq=D
+            'df_1ts_d': pd.DataFrame({'y': np.full(100, 0.0),
+                                      'weight': np.full(100, 1.0)},
+                                     index=pd.date_range('2014-01-01', periods=100, freq='D')),
+
+            # Single ts, freq=D , index named 'date
+            'df_1ts_d2': pd.DataFrame({'y': np.full(100, 0.0),
+                                       'weight': np.full(100, 1.0)},
+                                      index=pd.date_range('2014-01-01', periods=100, freq='D', name='date'))
+                .reset_index()
+            ,
+
+            # 2 ts with datetime index, freq=D
+            'df_2ts_d': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                      'weight': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)])},
+                                     index=np.tile(pd.date_range('2014-01-01', periods=100, freq='D'), 2))
+        }
+
+        l_source_d = ['df_1ts_d', 'df_2ts_d','df_1ts_d2']
+        l_source_w = ['df_1ts_2', 'df_2ts_2']
+
+        l_model_trend = [
+            forecast_models.model_linear,
+        ]
+        l_model_season = [
+            # forecast_models.model_season_wday,
+            # forecast_models.model_season_wday,
+            forecast_models.model_season_wday_2,
+            forecast_models.model_null
+        ]
+
+        l_col_name_weight = [  # None,
+            'weight']
+
+        l_results = []
+        l_optimize_info = []
+
+        # Fit , run n iterations, freq='D'
+        for (source, col_name_weight, model) in itertools.product(
+                l_source_d, l_col_name_weight, get_list_model(l_model_trend, l_model_season, 'both')):
+            df_y = dict_df_y[source].copy().pipe(normalize_df,col_name_weight=col_name_weight)
+            logger.info('Fitting src: %s , mod: %s, col_w: %s', source, model, col_name_weight)
+            for i in np.arange(0, n_iterations):
+                dict_fit_model = fit_model(model, df_y, source=source, freq='D')
+                l_results += [dict_fit_model['metadata']]
+                l_optimize_info += [dict_fit_model['optimize_info']]
+
+        # Fit , run n iterations, freq='D' - test function composition in different order
+        for (source, col_name_weight, model) in itertools.product(
+                l_source_d, l_col_name_weight, get_list_model(l_model_season, l_model_trend, 'both')):
+            df_y = dict_df_y[source].copy().pipe(normalize_df, col_name_weight=col_name_weight)
+            logger.info('Fitting src: %s , mod: %s, col_w: %s', source, model, col_name_weight)
+            for i in np.arange(0, n_iterations):
+                dict_fit_model = fit_model(model, df_y, source=source, freq='D')
+                l_results += [dict_fit_model['metadata']]
+                l_optimize_info += [dict_fit_model['optimize_info']]
+
+        df_result = pd.concat(l_results)
+        df_optimize_info = pd.concat(l_optimize_info)
+        logger_info('Result summary:', df_result)
+        logger_info('Optimize info summary:', df_optimize_info)
+
+    def test_extrapolate_model(self):
+        # with freq=None, defaults to W
+        df_y_forecast = extrapolate_model(forecast_models.model_constant, [1.0],
+                                          '2017-01-01', '2017-01-01', freq=None, extrapolate_years=1.0)
+        logger_info('df_y_forecast', df_y_forecast.tail(1))
+        logger_info('Result length:', df_y_forecast.index.size)
+        self.assertEquals(df_y_forecast.index.size, 53)
+
+        df_y_forecast = extrapolate_model(forecast_models.model_constant, [1.0],
+                                          '2017-01-01', '2017-12-31', freq='D', extrapolate_years=1.0)
+        logger_info('df_y_forecast', df_y_forecast.tail(1))
+        logger_info('Result length:', df_y_forecast.index.size)
+        self.assertEquals(df_y_forecast.index.size, 365 * 2)
+
+        df_y_forecast = extrapolate_model(forecast_models.model_constant, [1.0],
+                                          '2017-01-01', '2017-12-31', freq='MS', extrapolate_years=1.0)
+        logger_info('df_y_forecast', df_y_forecast.tail(1))
+        logger_info('Result length:', df_y_forecast.index.size)
+        self.assertEquals(df_y_forecast.index.size, 12 * 2)
+
+        df_y_forecast = extrapolate_model(forecast_models.model_constant, [1.0],
+                                          '2000-01-01', '2009-01-01', freq='YS', extrapolate_years=10.0)
+        logger_info('df_y_forecast', df_y_forecast.tail(20))
+        logger_info('Result length:', df_y_forecast.index.size)
+        self.assertEquals(df_y_forecast.index.size, 20)
+
+        # TODO: Test other time frequencies, e.g. Q, H, Y.
+
+    def test_get_df_actuals_clean(self):
+        dict_df_y = {
+            # Single ts
+            'df_1ts_nodate': pd.DataFrame({'y': np.full(100, 0.0),
+                                           'weight': np.full(100, 1.0)}),
+            # 2 ts
+            'df_2ts_nodate': pd.DataFrame({'y': np.concatenate([np.full(100, 0.0),
+                                                                np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                                           'weight': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                                           'x': np.tile(np.arange(0, 100), 2),
+                                           }),
+            # 1 ts with datetime index
+            'df_1ts_w': pd.DataFrame({'y': np.full(100, 0.0),
+                                      'weight': np.full(100, 1.0)
+                                      },
+                                     index=pd.date_range('2014-01-01', periods=100, freq='W')),
+            # 1 ts with datetime index named 'date
+            'df_1ts_w-2': pd.DataFrame({'y': np.full(100, 0.0),
+                                        'weight': np.full(100, 1.0)
+                                        },
+                                       index=pd.date_range('2014-01-01', periods=100, freq='W', name='date')),
+            # 1 ts with datetime column
+            'df_1ts_w-3': pd.DataFrame({'y': np.full(100, 0.0),
+                                        'weight': np.full(100, 1.0),
+                                        'date': pd.date_range('2014-01-01', periods=100, freq='W')
+                                        })
+        }
+        # Simple test - check for crashes
+
+        for k in dict_df_y.keys():
+            logger.info('Input: %s', k)
+            df_in = dict_df_y.get(k).pipe(normalize_df)
+            logger_info('DF_IN',df_in.tail(3))
+            df_result = get_df_actuals_clean(df_in,'test','test')
+            logger_info('Result:', df_result.tail(3))
+            unique_models = df_result.model.drop_duplicates().reset_index(drop=True)
+            self.assert_series_equal(unique_models, pd.Series(['actuals']))
+            logger_info('Models:', df_result.model.drop_duplicates())
+
+    def _test_run_forecast_basic_tests_new_api(self, n_sources=1, **kwargs):
+        # Both additive and multiplicative
+        dict_result = run_forecast(simplify_output=False, **kwargs)
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+        l_sources = df_metadata.source_long.unique()
+        include_all_fits = kwargs.get('include_all_fits')
+        if not include_all_fits:  # In this case, there should be only one model fitted per data source
+            self.assertTrue(df_metadata.is_best_fit.all())
+            self.assertTrue((df_data.is_best_fit | df_data.is_actuals).all())
+            # The following may not be true if a model doesn't converge
+            self.assertEquals(df_metadata.index.size, n_sources)
+            self.assertEquals(df_data.loc[~df_data.is_actuals].drop_duplicates('source_long').index.size,
+                              n_sources)
+
+        # Check that actuals are included
+        self.assertTrue((df_data.is_actuals.any()))
+
+        # Check that dtype is not corrupted
+        self.assertTrue(np.issubdtype(df_data.y.astype(float), np.float64))
+
+    def _test_run_forecast_check_length_new_api(self, **kwargs):
+        freq = kwargs.get('freq', 'D')
+
+        freq = detect_freq(kwargs.get('df_y').pipe(normalize_df))
+
+        freq_short = freq[0:1] if freq is not None else None  # Changes e.g. W-MON to W
+        freq_units_per_year = 52.0 if freq_short == 'W' else 365.0  # Todo: change to dict to support more frequencies
+
+        extrapolate_years = kwargs.get('extrapolate_years', 1.0)
+
+        # Both additive and multiplicative
+        dict_result = run_forecast(simplify_output=False, extrapolate_years=extrapolate_years, **kwargs)
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+        df_data_size = df_data.groupby(['source', 'model', 'is_actuals']).size().rename('group_size').reset_index()
+        df_data_size_unique = (
+            df_data.drop_duplicates(['source', 'model', 'is_actuals', 'date'])
+                .groupby(['source', 'model', 'is_actuals']).size().rename('group_size').reset_index()
+        )
+        logger_info('df_data_size:', df_data_size)
+        logger_info('df_data_size_unique:', df_data_size_unique)
+
+        df_y = kwargs.get('df_y')
+        assert df_y is not None
+
+        # Normalize df_y
+        df_y = normalize_df(df_y,
+                            kwargs.get('col_name_y', 'y'),
+                            kwargs.get('col_name_weight', 'weight'),
+                            kwargs.get('col_name_x', 'x'),
+                            kwargs.get('col_name_date', 'date'),
+                            kwargs.get('col_name_source', 'source'))
+        if 'source' not in df_y.columns:
+            df_y['source'] = kwargs.get('source_id', 'source')
+
+        l_sources = df_y.source.drop_duplicates()
+
+        for source in l_sources:
+            df_y_tmp = df_y.loc[df_y.source == source]
+
+            size_actuals_unique_tmp = df_y_tmp.drop_duplicates('x').index.size
+            size_actuals_tmp = df_y_tmp.index.size
+
+            df_data_size_tmp = df_data_size.loc[df_data_size.source == source]
+            df_data_size_actuals = df_data_size_tmp.loc[df_data_size_tmp.is_actuals]
+            df_data_size_fcast = df_data_size_tmp.loc[~df_data_size_tmp.is_actuals]
+
+            # logger.info('DEBUG: group size: %s',100 + extrapolate_years*freq_units_per_year)
+            # This assert doesn't work for all years - some have 365 days, some 366. Currently running with 365-day year
+
+            logger.info('DEBUG: df_data_size_fcast.group_size %s , size_actuals_tmp %s, total %s',
+                        df_data_size_fcast.group_size.values, size_actuals_tmp,
+                        size_actuals_tmp + extrapolate_years * freq_units_per_year)
+
+            self.assertTrue((df_data_size_actuals.group_size == size_actuals_tmp).all())
+
+            self.assert_array_equal(df_data_size_fcast.group_size,
+                                    size_actuals_unique_tmp + extrapolate_years * freq_units_per_year)
+            self.assertFalse(df_data_size_fcast.empty)
+
+    def _test_run_forecast(self, freq='D'):
+        # freq_short = freq[0:1]  # Changes e.g. W-MON to W
+        # freq_units_per_year = 52.0 if freq_short == 'W' else 365.0  # Todo: change to dict to support more frequencies
+
+        # Input dataframe without date column
+        df_y0 = pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                              'weight': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                              },
+                             )
+
+        df_y1 = pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                              'weight': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                              },
+                             index=np.tile(pd.date_range('2014-01-01', periods=100, freq=freq), 2))
+
+        # Too few samples
+        n = 4
+        df_y1b = pd.DataFrame({'y': np.full(n, 0.0)},
+                              index=pd.date_range('2017-01-01', periods=n, freq=freq))
+
+        df_y2 = pd.DataFrame({'y': np.full(100, 0.0)},
+                             index=pd.date_range('2017-01-01', periods=100, freq=freq))
+
+        # Df with source column
+        df_y3 = pd.DataFrame({'y': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                              'weight': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                              'source': ['src1'] * 100 + ['src2'] * 100
+                              },
+                             index=np.tile(pd.date_range('2014-01-01', periods=100, freq=freq), 2))
+        # As above, with renamed columns
+        df_y3b = pd.DataFrame({'y_test': np.concatenate([np.full(100, 0.0), np.round(np.arange(-0.5, 0.5, 0.01), 2)]),
+                               'weight_test': np.concatenate([np.full(100, 0.1), np.full(100, 1.0)]),
+                               'source_test': ['src1'] * 100 + ['src2'] * 100,
+                               'date_test': np.tile(pd.date_range('2014-01-01', periods=100, freq=freq), 2)
+                               })
+
+        # # Model lists
+        l_model_trend1 = [forecast_models.model_linear]
+        l_model_trend1b = [forecast_models.model_linear, forecast_models.model_season_wday_2]
+        l_model_trend2 = [forecast_models.model_linear, forecast_models.model_exp]
+
+        l_model_season1 = [forecast_models.model_season_wday_2]
+        l_model_season2 = [forecast_models.model_season_wday_2, forecast_models.model_null]
+        #
+        # # # Test input with source column, multiple sources
+        # self._test_run_forecast_basic_tests_new_api(df_y=df_y3, include_all_fits=True,
+        #                                             l_model_trend=l_model_trend2, l_model_season=l_model_season2)
+        # self._test_run_forecast_basic_tests_new_api(df_y=df_y3b, include_all_fits=True,
+        #                                             l_model_trend=l_model_trend2, l_model_season=l_model_season2,
+        #                                             col_name_y='y_test', col_name_date='date_test',
+        #                                             col_name_source='source_test', col_name_weight='weight_test')
+
+        ## New test - forecast length
+        logger.info('Testing Output Length')
+        self._test_run_forecast_check_length_new_api(df_y=df_y1, include_all_fits=False,
+                                                     l_model_trend=l_model_trend1b, source_id='source1')
+        self._test_run_forecast_check_length_new_api(df_y=df_y2, include_all_fits=False,
+                                                     l_model_trend=l_model_trend2, l_model_season=l_model_season2,
+                                                     source_id='source2')
+
+    def test_runforecast(self):
+        for freq in ['D',
+                     'W']:
+            self._test_run_forecast(freq=freq)
+
+    def test_run_forecast_simple_linear_model(self):
+        df1 = pd.DataFrame({'y': np.arange(0, 10.)},
+                           index=pd.date_range('2014-01-01', periods=10, freq='D'))
+        dict_result = run_forecast(simplify_output=False, df_y=df1, l_model_trend=[forecast_models.model_linear])
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(30))
+
+        df2 = pd.DataFrame({'y': np.arange(0, 10.), 'source': ['src1'] * 5 + ['src2'] * 5},
+                           index=pd.date_range('2014-01-01', periods=10, freq='D'))
+        dict_result = run_forecast(simplify_output=False, df_y=df2, l_model_trend=[forecast_models.model_linear])
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))
+
+    def test_run_forecast_naive(self):
+        # # Test 1 - linear series, 1 source
+        # df1 = pd.DataFrame({'y': np.arange(0,10.)},
+        #                     index=pd.date_range('2014-01-01', periods=10, freq='D'))
+        # dict_result = run_forecast(simplify_output=False, df_y=df1, l_model_trend = [forecast_models.model_naive],
+        #                            extrapolate_years=10./365)
+        #
+        # df_data = dict_result['data']
+        # df_metadata = dict_result['metadata']
+        # df_optimize_info = dict_result['optimize_info']
+        #
+        # logger_info('df_metadata:', df_metadata)
+        # logger_info('df_optimize_info:', df_optimize_info)
+        # logger_info('df_data:', df_data.groupby(['source', 'model']).tail(40))
+        #
+        # # Test 2 - 2 sources
+        # df2 = pd.DataFrame({'y': np.arange(0,10.),'source' : ['src1']*5 + ['src2']*5},
+        #                     index=pd.date_range('2014-01-01', periods=10, freq='D'))
+        # dict_result = run_forecast(simplify_output=False, df_y=df2, l_model_trend = [forecast_models.model_naive],
+        #                            extrapolate_years=10./365)
+        #
+        # df_data = dict_result['data']
+        # df_metadata = dict_result['metadata']
+        # df_optimize_info = dict_result['optimize_info']
+        #
+        # logger_info('df_metadata:', df_metadata)
+        # logger_info('df_optimize_info:', df_optimize_info)
+        # logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))
+        #
+        # # test 3: weight column
+        # df1 = pd.DataFrame({'y': np.arange(0, 10.), 'weight': array_zeros_in_indices(10,[5,6])},
+        #                    index=pd.date_range('2014-01-01', periods=10, freq='D'))
+        # dict_result = run_forecast(simplify_output=False, df_y=df1, l_model_trend=[forecast_models.model_naive],
+        #                            extrapolate_years=10. / 365)
+        #
+        # df_data = dict_result['data']
+        # df_metadata = dict_result['metadata']
+        # df_optimize_info = dict_result['optimize_info']
+        #
+        # logger_info('df_metadata:', df_metadata)
+        # logger_info('df_optimize_info:', df_optimize_info)
+        # logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))
+        #
+        # a_y_result = df_data.loc[df_data.model=='naive'].y.values
+        # logger_info('a_y_result:', a_y_result)
+        # self.assert_array_equal(a_y_result,
+        #     np.concatenate([
+        #     np.array([0., 0., 1., 2., 3., 4., 4.,4.,7.,8., 9.,]),
+        #              np.full(9, 9.)
+        #     ]
+        # ))
+        #
+        # df_forecast = dict_result['forecast']
+        # logger_info('df_forecast',df_forecast)
+        #
+        # # Test 3b: weight column, season_add_mult = 'both'
+        #
+        # df1 = pd.DataFrame({'y': np.arange(0, 10.), 'weight': array_zeros_in_indices(10, [5, 6])},
+        #                    index=pd.date_range('2014-01-01', periods=10, freq='D'))
+        # dict_result = run_forecast(simplify_output=False, df_y=df1, l_model_trend=[forecast_models.model_naive],
+        #                            extrapolate_years=10. / 365,
+        #                            season_add_mult='both')
+        #
+        # df_data = dict_result['data']
+        # df_metadata = dict_result['metadata']
+        # df_optimize_info = dict_result['optimize_info']
+        #
+        # logger_info('df_metadata:', df_metadata)
+        # logger_info('df_optimize_info:', df_optimize_info)
+        # logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))
+        #
+        # a_y_result = df_data.loc[df_data.model == 'naive'].y.values
+        # logger_info('a_y_result:', a_y_result)
+        # self.assert_array_equal(a_y_result,
+        #                         np.concatenate([
+        #                             np.array([0., 0., 1., 2., 3., 4., 4., 4., 7., 8., 9., ]),
+        #                             np.full(9, 9.)
+        #                         ]
+        #                         ))
+        #
+        # df_forecast = dict_result['forecast']
+        # logger_info('df_forecast', df_forecast)
+        #
+        # # Test 4: find_outliers
+        #
+        # df1 = pd.DataFrame({'y': np.arange(0, 10.)+10*array_ones_in_indices(10,[5,6])},
+        #                    index=pd.date_range('2014-01-01', periods=10, freq='D'))
+        # dict_result = run_forecast(simplify_output=False, df_y=df1, l_model_trend=[forecast_models.model_naive],
+        #                            extrapolate_years=10. / 365, find_outliers=True)
+        #
+        # df_data = dict_result['data']
+        # df_metadata = dict_result['metadata']
+        # df_optimize_info = dict_result['optimize_info']
+        #
+        # logger_info('df_metadata:', df_metadata)
+        # logger_info('df_optimize_info:', df_optimize_info)
+        # logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))
+        #
+        # a_y_result = df_data.loc[df_data.model=='naive'].y.values
+        # logger_info('a_y_result:', a_y_result)
+        # self.assert_array_equal(a_y_result,
+        #     np.concatenate([
+        #     np.array([0., 0., 1., 2., 3., 4., 4.,4.,7.,8., 9.,]),
+        #              np.full(9, 9.)
+        #     ]
+        # ))
+        #
+        # df_forecast = dict_result['forecast']
+        # logger_info('df_forecast',df_forecast)
+        #
+        # # Test 4b: find_outliers, season_add_mult = 'both'
+        #
+        # df1 = pd.DataFrame({'y': np.arange(0, 10.)+10*array_ones_in_indices(10,[5,6])},
+        #                    index=pd.date_range('2014-01-01', periods=10, freq='D'))
+        # dict_result = run_forecast(simplify_output=False, df_y=df1, l_model_trend=[forecast_models.model_naive],
+        #                            extrapolate_years=10. / 365, find_outliers=True, season_add_mult='both')
+        #
+        # df_data = dict_result['data']
+        # df_metadata = dict_result['metadata']
+        # df_optimize_info = dict_result['optimize_info']
+        #
+        # logger_info('df_metadata:', df_metadata)
+        # logger_info('df_optimize_info:', df_optimize_info)
+        # logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))
+        #
+        # a_y_result = df_data.loc[df_data.model=='naive'].y.values
+        # logger_info('a_y_result:', a_y_result)
+        # self.assert_array_equal(a_y_result,
+        #     np.concatenate([
+        #     np.array([0., 0., 1., 2., 3., 4., 4.,4.,7.,8., 9.,]),
+        #              np.full(9, 9.)
+        #     ]
+        # ))
+        #
+        # df_forecast = dict_result['forecast']
+        # logger_info('df_forecast',df_forecast)
+
+        # Test 5: Series with gap
+
+        # df1 = (
+        #           pd.DataFrame({'y': np.arange(0, 10.),
+        #                     #'weight': array_zeros_in_indices(10, [5, 6]),
+        #                     'date': pd.date_range('2014-01-01', periods=10, freq='D')},
+        #                    )
+        #
+        # )
+        #
+        # df1 = pd.concat([df1.head(5), df1.tail(3)], sort=False, ignore_index=False).pipe(normalize_df)
+        #
+        # dict_result = run_forecast(simplify_output=False, df_y=df1,
+        #                            l_model_trend=[],
+        #                            l_model_naive=[forecast_models.model_naive, forecast_models.model_snaive_wday],
+        #                            extrapolate_years=10. / 365,
+        #                            season_add_mult='both')
+        #
+        #
+        # df_data = dict_result['data']
+        # df_metadata = dict_result['metadata']
+        # df_optimize_info = dict_result['optimize_info']
+        #
+        # logger_info('df_metadata:', df_metadata)
+        # logger_info('df_optimize_info:', df_optimize_info)
+        # logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))
+        #
+        # a_y_result = df_data.loc[df_data.model == 'naive'].y.values
+        # logger_info('a_y_result:', a_y_result)
+        # self.assert_array_equal(a_y_result,
+        #                         np.concatenate([
+        #                             np.array([0., 0., 1., 2., 3., 4., 4., 4., 7., 8., 9., ]),
+        #                             np.full(9, 9.)
+        #                         ]
+        #                         ))
+        #
+        # df_forecast = dict_result['forecast']
+        # logger_info('df_forecast', df_forecast)
+
+        # Test 6: Series with spike, find_outliers=True, use model_snaive_wday
+
+        df1 = (
+                  pd.DataFrame({'y': np.arange(0, 21.) + 10*array_ones_in_indices(21, 7),
+                            #'weight': array_zeros_in_indices(10, [5, 6]),
+                            'date': pd.date_range('2014-01-01', periods=21, freq='D')},
+                           )
+
+        )
+
+        #array_ones_in_indices(n, l_indices)
+
+        dict_result = run_forecast(simplify_output=False, df_y=df1,
+                                   l_model_trend=[],
+                                   l_model_season=[],
+                                   l_model_naive=[forecast_models.model_snaive_wday],
+                                   extrapolate_years=20. / 365,
+                                   season_add_mult='both', find_outliers=True)
+
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        df_data['wday']=df_data.date.dt.weekday
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))
+
+        a_y_result = df_data.loc[df_data.model == 'snaive_wday'].y.values
+        logger_info('a_y_result:', a_y_result)
+        self.assert_array_equal(a_y_result,
+                                np.array([0., 1., 2., 3., 4., 5., 6., 17., 8., 9., 10., 11., 12.,
+                                 13., 14., 15., 16., 17., 18., 19., 20., 14., 15., 16., 17., 18.,
+                                 19., 20., 14., 15., 16., 17., 18., 19.])
+                                )
+
+        df_forecast = dict_result['forecast']
+        logger_info('df_forecast', df_forecast)
+
+    def test_run_forecast_naive2(self):
+        # Test 1: run forecast with  naive model, find_outliers, season_add_mult = 'add', weekly samples
+        path_df_naive = os.path.join(base_folder, 'df_test_naive.csv')
+        df_test_naive = pd.read_csv(path_df_naive)
+
+        l_season_yearly = [
+            forecast_models.model_season_month,
+            # model_season_fourier_yearly,
+            forecast_models.model_null]
+
+        l_season_weekly = [  # forecast_models.model_season_wday_2,
+            forecast_models.model_season_wday, forecast_models.model_null]
+
+        dict_result = run_forecast(simplify_output=False, df_y=df_test_naive,
+                                   #l_model_trend=[forecast_models.model_naive],
+                                   l_model_naive=[forecast_models.model_naive],
+                                    l_season_yearly=l_season_yearly,
+                                    l_season_weekly=l_season_weekly,
+                                   extrapolate_years=75. / 365, find_outliers=True, season_add_mult='add')
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.loc[(df_data.date>'2017-12-01') & (df_data.date<'2018-02-01')])
+
+        a_y_result = df_data.loc[df_data.model == 'naive'].y.values
+        #logger_info('a_y_result:', a_y_result)
+
+        df_forecast = dict_result['forecast']
+        logger_info('df_forecast',df_forecast.loc[(df_forecast.date>'2017-12-01')& (df_forecast.date<'2018-02-01')])
+
+        # After first spike, naive forecast and actuals start matching, only if season_add_mult='both'
+        self.assertNotEqual(df_data.loc[(df_data.date == '2018-01-07') & (df_data.model=='naive')].y.iloc[0],
+                            df_data.loc[(df_data.date == '2018-01-07') & (df_data.model == 'actuals')].y.iloc[0])
+
+
+        # Test 2: run forecast with  naive model, find_outliers, season_add_mult = 'both', weekly samples
+        #path_df_naive = os.path.join(base_folder, 'df_test_naive.csv')
+        #df_test_naive = pd.read_csv(path_df_naive)
+
+        l_season_yearly = [
+            forecast_models.model_season_month,
+            # model_season_fourier_yearly,
+            forecast_models.model_null]
+
+        l_season_weekly = [  # forecast_models.model_season_wday_2,
+            forecast_models.model_season_wday, forecast_models.model_null]
+
+        dict_result = run_forecast(simplify_output=False, df_y=df_test_naive,
+                                   #l_model_trend=[forecast_models.model_naive],
+                                   l_model_naive=[forecast_models.model_naive],
+                                    l_season_yearly=l_season_yearly,
+                                    l_season_weekly=l_season_weekly,
+                                   extrapolate_years=75. / 365, find_outliers=True, season_add_mult='both')
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.loc[(df_data.date>'2017-12-01') & (df_data.date<'2018-02-01')])
+
+        a_y_result = df_data.loc[df_data.model == 'naive'].y.values
+        #logger_info('a_y_result:', a_y_result)
+
+        df_forecast = dict_result['forecast']
+        logger_info('df_forecast',df_forecast.loc[(df_forecast.date>'2017-12-01')& (df_forecast.date<'2018-02-01')])
+
+        # After first spike, naive forecast and actuals start matching, only if season_add_mult='both'
+        self.assertNotEqual(df_data.loc[(df_data.date == '2018-01-07') & (df_data.model=='naive')].y.iloc[0],
+                            df_data.loc[(df_data.date == '2018-01-07') & (df_data.model == 'actuals')].y.iloc[0])
+
+
+        # Test 3 - multiple model_naive runs
+        path_df_naive = os.path.join(base_folder, 'df_test_naive.csv')
+        df_test_naive = pd.read_csv(path_df_naive)
+
+        model_naive2 = forecast_models.ForecastModel('naive2', 0, forecast_models._f_model_naive)
+
+        l_model_naive = [forecast_models.model_naive,model_naive2]
+
+        dict_result = run_forecast(simplify_output=False, df_y=df_test_naive,
+                                   l_model_trend=[],
+                                    l_season_yearly=l_season_yearly,
+                                    l_season_weekly=l_season_weekly,
+                                   l_model_naive= l_model_naive,
+                                   extrapolate_years=75. / 365, find_outliers=True, season_add_mult='add', )
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.loc[(df_data.date>'2017-12-01') & (df_data.date<'2018-02-01')])
+
+        a_y_result = df_data.loc[df_data.model == 'naive'].y.values
+        #logger_info('a_y_result:', a_y_result)
+
+        df_forecast = dict_result['forecast']
+        logger_info('df_forecast',df_forecast.loc[(df_forecast.date>'2017-12-01')& (df_forecast.date<'2018-02-01')])
+
+        # After first spike, naive forecast and actuals start matching, only if season_add_mult='both'
+        self.assertNotEqual(df_data.loc[(df_data.date == '2018-01-07') & (df_data.model=='naive')].y.iloc[0],
+                            df_data.loc[(df_data.date == '2018-01-07') & (df_data.model == 'actuals')].y.iloc[0])
+
+    def test_run_forecast_sparse_with_gaps(self):
+        df_test = pd.DataFrame({'date': pd.to_datetime(['2018-08-01', '2018-08-09']), 'y': [1., 2.]})
+        df_out = run_forecast(df_test, extrapolate_years=1.0)
+        logger_info('df_out', df_out)
+    def test_run_forecast_output_options(self):
+        freq = 'D'
+        freq_short = freq[0:1]  # Changes e.g. W-MON to W
+        freq_units_per_year = 52.0 if freq_short == 'W' else 365.0  # Todo: change to dict to support more frequencies
+
+        df_y = pd.DataFrame({'y': np.full(100, 0.0)},
+                            index=pd.date_range('2014-01-01', periods=100, freq=freq))
+
+        # SolverConfig with trend
+        conf1 = ForecastInput(
+            source_id='source1',
+            l_model_trend=[forecast_models.model_linear, forecast_models.model_constant],
+            l_model_season=None, df_y=df_y, date_start_actuals=None
+        )
+
+        logger.info('Testing run forecast - default settings')
+
+        dict_result = run_l_forecast([conf1])
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+        for include_all_fits in [False, True]:
+            logger.info('Testing run forecast - include_all_fits=%s',
+                        include_all_fits)
+
+            dict_result = run_l_forecast([conf1],
+                                         include_all_fits=include_all_fits)
+
+            df_data = dict_result['data']
+            df_metadata = dict_result['metadata']
+            df_optimize_info = dict_result['optimize_info']
+
+            logger_info('df_metadata:', df_metadata)
+            logger_info('df_optimize_info:', df_optimize_info)
+            logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+            # TODO: ADD ASSERTS
+
+    def test_run_forecast_step(self):
+        # Setup
+        freq = 'D'
+        df_y1 = pd.DataFrame({'y': 5 * [10.0] + 5 * [20.0]},
+                             index=pd.date_range('2014-01-01', periods=10, freq=freq))
+
+        # SolverConfig with trend
+        conf1 = ForecastInput(
+            source_id='source1',
+            l_model_trend=[forecast_models.model_constant,
+                           forecast_models.model_constant + forecast_models.model_step],
+            l_model_season=None, df_y=df_y1, weights_y_values=1.0, date_start_actuals=None
+        )
+
+        dict_result = run_l_forecast([conf1], include_all_fits=True)
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+        # Test 2 : 2 steps
+
+        # Setup
+        freq = 'D'
+        df_y1 = pd.DataFrame({'y': [1., 1., 1., 1., 1., 1., 5., 5., 6., 6.]},
+                             index=pd.date_range('2014-01-01', periods=10, freq=freq))
+
+        # SolverConfig with trend
+        conf1 = ForecastInput(
+            source_id='source1',
+            l_model_trend=[forecast_models.model_constant + forecast_models.model_two_steps],
+            l_model_season=None, df_y=df_y1, weights_y_values=1.0, date_start_actuals=None
+        )
+
+        dict_result = run_l_forecast([conf1], include_all_fits=True)
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+    def test_run_forecast_sigmoid_step(self):
+        # Setup
+        freq = 'D'
+        df_y1 = pd.DataFrame({'y': [10., 10.1, 10.2, 10.3, 10.4, 20.0, 20.1, 20.2, 20.3, 20.4, 20.5, 20.6]},
+                             index=pd.date_range('2014-01-01', periods=12, freq=freq))
+
+        # SolverConfig with trend
+        conf1 = ForecastInput(
+            source_id='source1',
+            l_model_trend=[forecast_models.model_constant,
+                           forecast_models.model_sigmoid_step,
+                           forecast_models.model_constant + forecast_models.model_sigmoid_step,
+                           forecast_models.model_linear + forecast_models.model_sigmoid_step,
+                           forecast_models.model_linear * forecast_models.model_sigmoid_step],
+            l_model_season=None, df_y=df_y1, weights_y_values=1.0, date_start_actuals=None
+        )
+
+        dict_result = run_l_forecast([conf1], include_all_fits=True)
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+        # Same with negative step
+        df_y1 = pd.DataFrame({'y': [20.0, 20.1, 20.2, 20.3, 20.4, 20.5, 20.6, 10., 10.1, 10.2, 10.3, 10.4]},
+                             index=pd.date_range('2014-01-01', periods=12, freq=freq))
+
+        conf1 = ForecastInput(
+            source_id='source1',
+            l_model_trend=[forecast_models.model_constant,
+                           forecast_models.model_sigmoid_step,
+                           forecast_models.model_constant + forecast_models.model_sigmoid_step,
+                           forecast_models.model_linear + forecast_models.model_sigmoid_step,
+                           forecast_models.model_linear * forecast_models.model_sigmoid_step],
+            l_model_season=None, df_y=df_y1, weights_y_values=1.0, date_start_actuals=None
+        )
+
+        dict_result = run_l_forecast([conf1], include_all_fits=True)
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+    def test_run_forecast_fourier_yearly(self):
+        # Yearly sinusoidal function
+
+        # With daily samples
+        length = 2 * 365
+        # size will be +-10 +- uniform error
+        a_date = pd.date_range(start='2018-01-01', freq='D', periods=length)
+        a_y = (10 + np.random.uniform(low=0, high=1, size=length) +
+               10 * (np.sin(np.linspace(-4 * np.pi, 4 * np.pi, length))))
+        df_y = pd.DataFrame({'y': a_y}, index=a_date)
+
+        conf = ForecastInput(
+            source_id='source',
+            l_model_trend=[
+                forecast_models.model_constant,
+                forecast_models.model_season_fourier_yearly,
+                forecast_models.model_constant +
+                forecast_models.model_season_fourier_yearly],
+            l_model_season=[forecast_models.model_null], df_y=df_y, weights_y_values=1.0, date_start_actuals=None
+        )
+        dict_result = run_l_forecast([conf], include_all_fits=True)
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+        df = df_data.loc[(df_data.model == 'a') | df_data.is_best_fit,
+                         ['y', 'date', 'model']]
+
+        df = df.pivot(values='y', columns='model', index='date')
+        if platform.system() != 'Darwin':  # matplotlib tests don't work on mac # matplotlib tests don't work on mac
+            df.plot()
+
+        length = 1 * 365
+        # size will be +-10 +- uniform error
+        a_date = pd.date_range(start='2018-01-01', freq='D', periods=length)
+        a_y = (10 + np.random.uniform(low=0, high=1, size=length) +
+               10 * (np.sin(np.linspace(-4 * np.pi, 4 * np.pi, length))) +
+               5 * (np.cos(np.linspace(-6 * np.pi, 6 * np.pi, length))))
+        df_y = pd.DataFrame({'y': a_y}, index=a_date)
+
+        conf = ForecastInput(
+            source_id='source',
+            l_model_trend=[
+                forecast_models.model_constant,
+                forecast_models.model_season_fourier_yearly,
+                forecast_models.model_constant +
+                forecast_models.model_season_fourier_yearly],
+            l_model_season=[forecast_models.model_null], df_y=df_y, weights_y_values=1.0, date_start_actuals=None
+        )
+        dict_result = run_l_forecast([conf], include_all_fits=True)
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+        df = df_data.loc[(df_data.model == 'a') | df_data.is_best_fit,
+                         ['y', 'date', 'model']]
+
+        df = df.pivot(values='y', columns='model', index='date')
+        if platform.system() != 'Darwin':  # matplotlib tests don't work on mac # matplotlib tests don't work on mac
+            df.plot()
+        # TODO find a better assertion test
+        pass
+
+    def test_run_forecast_sigmoid(self):
+        # Input parameters
+        b_in = 100.
+        c_in = 40.
+        d_in = 1.
+        # linear params
+        a_lin = 0.01
+        b_lin = 0.05
+
+        is_mult_l = [False, True]
+
+        def sigmoid(x, a, b, c, d):
+            y = a + (b - a) / (1 + np.exp(- d * (x - c)))
+            return y
+
+        a_x = np.arange(1, 100)
+        # linear to find
+
+        for is_mult in is_mult_l:
+
+            if is_mult:
+                a_in = 1
+                model = forecast_models.model_linear * forecast_models.model_sigmoid
+                y_lin = a_lin * a_x + b_lin
+                y_in = sigmoid(a_x, a_in, b_in, c_in, d_in) * y_lin
+                input_params = [a_lin, b_lin]
+                y_rand = np.random.uniform(low=0.001, high=0.1 * b_in, size=len(a_x)) * y_lin
+            else:
+                a_in = 30  # the constant
+                model = forecast_models.model_constant + forecast_models.model_sigmoid
+                y_in = sigmoid(a_x, a_in, b_in, c_in, d_in)
+                input_params = [a_in]
+                y_rand = np.random.uniform(low=0.001, high=0.1 * b_in, size=len(a_x))
+
+            input_params = input_params + [b_in - a_in, c_in, d_in]
+
+            y_in = y_rand + y_in
+            df_y = pd.DataFrame({'y': y_in}, index=a_x)
+            # SolverConfig with trend
+            conf1 = ForecastInput(
+                source_id='source1',
+                l_model_trend=[
+                    forecast_models.model_constant,
+                    # forecast_models.model_sigmoid,
+                    model,
+                    # forecast_models.model_linear + forecast_models.model_sigmoid,
+                    # forecast_models.model_linear * forecast_models.model_sigmoid
+                ],
+                l_model_season=None, df_y=df_y, weights_y_values=1.0, date_start_actuals=None
+            )
+
+            dict_result = run_l_forecast([conf1],
+                                         include_all_fits=True)
+            df_data = dict_result['data']
+            df_metadata = dict_result['metadata']
+            # df_optimize_info = dict_result['optimize_info']
+
+            df = df_data.loc[:, ['y', 'date', 'model']]
+
+            df = df.pivot(values='y', columns='model', index='date')
+            if platform.system() != 'Darwin':  # matplotlib tests don't work on mac  # matplotlib tests don't work on mac
+                df.plot()
+            output_params = df_metadata.loc[df_metadata.is_best_fit, 'params_str']
+            logger.info('Input parameters: %s, Output parameters: %s',
+                        input_params, output_params.iloc[0])
+            pass  # to see the plot
+
+    def test_auto_find_sigmoid_step(self):
+        # Setup
+
+        # First do it manually
+        freq = 'D'
+        a_y = [19.8, 19.9, 20.0, 20.1, 20.2, 20.3, 20.4, 20.5,
+               20.6, 10., 10.1, 10.2, 10.3, 10.4,
+               10.5, 10.6, 10.7, 10.8, 10.9]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df_y = pd.DataFrame({'y': a_y}, index=a_date)
+        a_x = np.arange(0, len(a_y))
+
+        steps, spikes = forecast_models.find_steps_and_spikes(a_x, a_y, a_date)
+        assert len(steps) == 1
+        assert len(spikes) == 0
+        step_model = steps[0]
+        trend_models = [forecast_models.model_linear + step_model,
+                        forecast_models.model_linear + forecast_models.model_sigmoid_step,
+                        forecast_models.model_linear]
+
+        # SolverConfig with trend
+        conf1 = ForecastInput(
+            source_id='source1',
+            l_model_trend=trend_models,
+            l_model_season=None,
+            df_y=df_y,
+            weights_y_values=1.0,
+            date_start_actuals=None
+        )
+
+        dict_result = run_l_forecast([conf1],
+                                     include_all_fits=True)
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+        # Then do it automatically
+        trend_models = [forecast_models.model_linear]
+
+        # SolverConfig with trend
+        conf2 = ForecastInput(
+            source_id='source1',
+            l_model_trend=trend_models,
+            l_model_season=None,
+            df_y=df_y,
+            weights_y_values=1.0,
+            date_start_actuals=None
+        )
+
+        dict_result = run_l_forecast([conf2],
+                                     include_all_fits=True, do_find_steps_and_spikes=True)
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+        # Two changes
+        a_y = np.concatenate((np.arange(-1, 31), [50], np.arange(51, 70), [0], np.arange(1, 30)),
+                             axis=0)
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df_y = pd.DataFrame({'y': a_y}, index=a_date)
+        trend_models = [forecast_models.model_linear]
+
+        # SolverConfig with trend
+        conf3 = ForecastInput(
+            source_id='source1',
+            l_model_trend=trend_models,
+            l_model_season=None,
+            df_y=df_y,
+            weights_y_values=1.0,
+            date_start_actuals=None
+        )
+
+        dict_result = run_l_forecast([conf3],
+                                     include_all_fits=True, do_find_steps_and_spikes=True)
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+    def test_auto_find_sigmoid_spike(self):
+        # Setup
+
+        # First do it manually
+        freq = 'D'
+        a_y = np.concatenate((np.arange(-1, 30), [50, 51], np.arange(31, 50)),
+                             axis=0)
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df_y = pd.DataFrame({'y': a_y}, index=a_date).pipe(normalize_df)
+        a_x = np.arange(0, len(a_y))
+
+        steps, spikes = forecast_models.find_steps_and_spikes(a_x, a_y, a_date)
+        assert len(steps) == 0
+        assert len(spikes) == 1
+        spike_model = spikes[0]
+        trend_models = [forecast_models.model_linear * spike_model,
+                        forecast_models.model_linear]
+
+        # SolverConfig with trend
+        conf1 = ForecastInput(
+            source_id='source1',
+            l_model_trend=trend_models,
+            l_model_season=None,
+            df_y=df_y,
+            weights_y_values=1.0,
+            date_start_actuals=None
+        )
+
+        dict_result = run_l_forecast([conf1],
+                                     include_all_fits=True, do_find_steps_and_spikes=False)
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+        # Same automatically
+        trend_models = [forecast_models.model_linear]
+        # SolverConfig with trend
+        conf3 = ForecastInput(
+            source_id='source1',
+            l_model_trend=trend_models,
+            l_model_season=None,
+            df_y=df_y,
+            weights_y_values=1.0,
+            date_start_actuals=None
+        )
+
+        dict_result = run_l_forecast([conf3],
+                                     include_all_fits=True,
+                                     do_find_steps_and_spikes=True)
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(1))
+
+    def test_run_forecast_get_outliers(self):
+
+        # Test 1 - no outliers
+        a_y = [20.0, 20.1, 20.2, 20.3, 20.4, 20.5]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df = pd.DataFrame({'y': a_y})
+
+        dict_result = run_forecast(df, find_outliers=True, simplify_output=False, include_all_fits=True,
+                                   season_add_mult='add')
+        logger_info('Metadata', dict_result['metadata'])
+        logger_info('data', dict_result['data'].tail(3))
+
+        # Check that dtype of y is not corrupted by None values from weight mask - this happens when no spikes found
+        self.assertTrue(np.issubdtype(dict_result['data'].y, np.float64))
+
+        # Test 2 - Single step
+        a_y = [19.8, 19.9, 20.0, 20.1, 20.2, 20.3, 20.4, 20.5,
+               20.6, 10., 10.1, 10.2, 10.3, 10.4,
+               10.5, 10.6, 10.7, 10.8, 10.9]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df = pd.DataFrame({'y': a_y})
+
+        dict_result = run_forecast(df, find_outliers=True, simplify_output=False, include_all_fits=True,
+                                   season_add_mult='add')
+        logger_info('Metadata', dict_result['metadata'])
+        logger_info('data', dict_result['data'].tail(3))
+
+        # Check that dtype of y is not corrupted by None values from weight mask - this happens when no spikes found
+        self.assertTrue(np.issubdtype(dict_result['data'].y, np.float64))
+
+        # Test 3 - Single spike
+
+        a_y = [19.8, 19.9, 20.0, 20.1, 20.2, 20.3, 20.4, 20.5,
+               20.6, 10., 20.7, 20.8, 20.9, 21.0,
+               21.1, 21.2, 21.3, 21.4, 21.5]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df_spike = pd.DataFrame({'y': a_y})
+
+        dict_result = run_forecast(df_spike, find_outliers=True,
+                                   simplify_output=False, include_all_fits=True,
+                                   season_add_mult='add')
+        df_data = dict_result['data']
+        mask = df_data.loc[df_data.model == 'actuals'].weight
+        self.assert_array_equal(mask, [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, ])
+
+        # Test 5 - 2 spikes and 1 step
+        a_y = [19.8, 19.9, 30.0, 30.1, 20.2, 20.3, 20.4, 20.5,
+               20.6, 10., 10.1, 10.2, 10.3, 10.4,
+               10.5, 10.6, 30.7, 10.8, 10.9]
+
+        df = pd.DataFrame({'y': a_y}).pipe(normalize_df)
+
+        dict_result = run_forecast(df, find_outliers=True, simplify_output=False, include_all_fits=True,
+                                   season_add_mult='add')
+        logger_info('Metadata', dict_result['metadata'])
+        df_result = dict_result['data']
+        logger_info('data', df_result.tail(3))
+        mask = df_result.loc[df_result.model=='actuals'].weight
+        self.assert_array_equal(mask, [1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
+
+    def test_run_forecast_auto_season(self):
+        # Yearly sinusoidal function
+
+        # With daily samples
+        length = 2 * 365
+        # size will be +-10 +- uniform error
+        a_date = pd.date_range(start='2018-01-01', freq='D', periods=length)
+        a_y = (10 + np.random.uniform(low=0, high=1, size=length) +
+               10 * (np.sin(np.linspace(-4 * np.pi, 4 * np.pi, length))))
+        df_y = pd.DataFrame({'y': a_y}, index=a_date)
+
+        dict_result = run_forecast(df_y, season_add_mult='add', simplify_output=False, include_all_fits=True,
+                                   l_model_trend=[forecast_models.model_linear])
+        df_metadata = dict_result['metadata']
+
+        l_model_expected = ['linear', '(linear+(season_wday+season_fourier_yearly))',
+                            '(linear+season_wday)', '(linear+season_fourier_yearly)']
+
+        self.assert_array_equal(df_metadata.model, l_model_expected)
+        logger_info('df_metadata:', df_metadata)
+
+        # As above, with additive and multiplicative seasonality
+
+        dict_result = run_forecast(df_y, season_add_mult='both', simplify_output=False, include_all_fits=True,
+                                   l_model_trend=[forecast_models.model_linear])
+        df_metadata = dict_result['metadata']
+
+        l_model_expected = [
+            '(linear*(season_wday*season_fourier_yearly))',
+            '(linear*season_fourier_yearly)',
+            '(linear*season_wday)',
+            '(linear+(season_wday+season_fourier_yearly))',
+            '(linear+season_fourier_yearly)',
+            '(linear+season_wday)',
+            'linear' ]
+
+        self.assert_array_equal(df_metadata.model.values, l_model_expected)
+        logger_info('df_metadata:', df_metadata)
+
+    def test_run_forecast_with_weight(self):
+        df1 = pd.DataFrame({'y': np.arange(0, 10.),
+                            'date': pd.date_range('2014-01-01', periods=10, freq='D'),
+                            'weight': 1.})
+        dict_result = run_forecast(simplify_output=False, df_y=df1, l_model_trend = [forecast_models.model_linear],
+                                   extrapolate_years=10./365)
+
+        df_forecast = dict_result['forecast']
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_forecast:', df_forecast.groupby(['source', 'model']).tail(30))
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(30))
+
+        df_forecast_filtered = df_forecast.loc[~df_forecast.is_actuals & (df_forecast.date>'2014-01-10')]
+        self.assert_series_equal(df_forecast_filtered.y, df_forecast_filtered.q5)
+
+        df1b = df1.copy()
+        df1b.loc[0,'weight']=0.
+
+
+
+        dict_result = run_forecast(simplify_output=False, df_y=df1b, l_model_trend = [forecast_models.model_linear],
+                                   extrapolate_years=10./365)
+
+        df_forecast = dict_result['forecast']
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_forecast:', df_forecast.groupby(['source', 'model']).tail(30))
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(30))
+
+        len_forecast=df_data.loc[~df_data.is_actuals].index.size
+        self.assertEquals(len_forecast,19) # First sample shouldn't be included due to weight=0
+
+        # Since fit is perfect, prediction interval should be equal to point forecast
+        df_forecast_filtered = df_forecast.loc[~df_forecast.is_actuals & (df_forecast.date>'2014-01-10')]
+        self.assert_series_equal(df_forecast_filtered.y, df_forecast_filtered.q5)
+
+        # Test with model_ramp
+        # Param A of model_ramp needs to be within the 15-85 percentile of valid x values
+        # Before a bugfix, we would get initial guesses of A=2, with boundaries (5.6, 8.4)
+        # Note: somehow validate bounds doesn't catch this!
+
+        df1c = df1.copy()
+        df1c.loc[0:4, 'weight'] = 0.
+
+        dict_result = run_forecast(simplify_output=False, df_y=df1c, l_model_trend=[forecast_models.model_ramp],
+                                   extrapolate_years=10. / 365)
+
+        df_forecast = dict_result['forecast']
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_forecast:', df_forecast.groupby(['source', 'model']).tail(30))
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(30))
+
+        len_forecast=df_data.loc[~df_data.is_actuals].index.size
+        self.assertEquals(len_forecast,15) # First 5 samples shouldn't be included due to weight=0
+
+        # # Since fit is perfect, prediction interval should be equal to point forecast
+        # df_forecast_filtered = df_forecast.loc[~df_forecast.is_actuals & (df_forecast.date>'2014-01-10')]
+        # self.assert_series_equal(df_forecast_filtered.y, df_forecast_filtered.q5)
+
+
+    def test_detect_freq(self):
+
+        # Initial test - what happens with single sample input?
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=1, freq='H')
+        result = detect_freq(a_date)
+        #self.assertEquals(result, 'H')
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=24*7, freq='H')
+        result = detect_freq(a_date)
+        self.assertEquals(result, 'H')
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 365, freq='D')
+        result = detect_freq(a_date)
+        self.assertEquals(result, 'D')
+
+        l_freq_wday = ['W-MON', 'W-TUE', 'W-WED', 'W-THU', 'W-FRI', 'W-SAT', 'W-SUN']
+        for freq_wday in l_freq_wday:
+            a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 52, freq=freq_wday)
+            result = detect_freq(a_date)
+            self.assertEquals(result, freq_wday)
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 12, freq='M')
+        result = detect_freq(a_date)
+        self.assertEquals(result, 'M')
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 12, freq='MS')
+        result = detect_freq(a_date)
+        self.assertEquals(result, 'MS')
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 12, freq='Q')
+        result = detect_freq(a_date)
+        self.assertEquals(result, 'Q')
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 12, freq='Y')
+        result = detect_freq(a_date)
+        self.assertEquals(result, 'Y')
+
+        # Test with input dataframe
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=24 * 7, freq='H')
+        df_y = pd.DataFrame({'date': a_date})
+        result = detect_freq(df_y)
+        self.assertEquals(result, 'H')
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 365, freq='D')
+        df_y = pd.DataFrame({'date': a_date})
+        result = detect_freq(df_y)
+        self.assertEquals(result, 'D')
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 12, freq='M')
+        df_y = pd.DataFrame({'date': a_date})
+        result = detect_freq(df_y)
+        self.assertEquals(result, 'M')
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 12, freq='Q')
+        df_y = pd.DataFrame({'date': a_date})
+        result = detect_freq(df_y)
+        self.assertEquals(result, 'Q')
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 12, freq='Y')
+        df_y = pd.DataFrame({'date': a_date})
+        result = detect_freq(df_y)
+        self.assertEquals(result, 'Y')
+
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=4 * 12, freq='YS')
+        df_y = pd.DataFrame({'date': a_date})
+        result = detect_freq(df_y)
+        self.assertEquals(result, 'YS')
+
+        # Test with sparse input series
+        a_date = pd.to_datetime(['2018-08-01', '2018-08-09'])
+        df_y = pd.DataFrame({'date':a_date})
+        result = detect_freq(df_y)
+        self.assertEquals(result, 'D')
+
+    # TODO: ADD TEST WITH NULL VALUES, E.G. MODEL_NAIVE_WDAY
+    def test_get_pi(self):
+
+        def check_result(df_result):
+            self.assertTrue('q5' in df_result.columns)
+            df_result_actuals = df_result.loc[df_result.is_actuals]
+            if 'is_weight' in df_result_actuals.columns:
+                df_result_actuals = df_result_actuals.loc[~df_result_actuals.is_weight]
+            date_max_actuals = df_result_actuals.date.max()
+            logger_info('debug: date max actuals', date_max_actuals)
+
+            df_result_forecast = df_result.loc[~df_result.is_actuals & (df_result.date > date_max_actuals)]
+            self.assertFalse(df_result_forecast.q5.isnull().any())
+
+        # First test with single source
+        # then test applied function on df grouped by source
+
+        a_date_actuals = pd.date_range('2014-01-01', periods=10, freq='W')
+        a_y_actuals = np.arange(0, 10.)
+        df_actuals = (
+            pd.DataFrame({'date': a_date_actuals, 'y': a_y_actuals,
+                          'source': 's1', 'is_actuals': True, 'is_best_fit': False, 'model': 'actuals'})
+        )
+
+        a_date = pd.date_range('2014-01-01', periods=20, freq='W')
+        a_y = np.arange(0, 20.) + (np.tile([-1, 1], (10)) * np.arange(2, 0., -0.1))
+
+        df_fcast = (
+            pd.DataFrame({'date': a_date, 'y': a_y,
+                          'source': 's1', 'is_actuals': False, 'is_best_fit': True, 'model': 'linear'})
+        )
+
+        df1 = pd.concat([df_actuals, df_fcast], ignore_index=True, sort=False)
+
+        df_result = get_pi(df1, n=100)
+        df_result0 = df_result
+        # logger_info('df_result1:', df_result1)
+        logger_info('df_result1:', df_result.groupby(['source', 'model']).head(1))
+        logger_info('df_result1:', df_result.groupby(['source', 'model']).tail(1))
+        # TODO: Add checks
+        check_result(df_result)
+
+        # Test 1b - input dataframe without is_best_fit column, source column
+        df1c = df1[['date', 'is_actuals', 'model', 'y']]
+        df_result = get_pi(df1c, n=100)
+        # logger_info('df_result1:', df_result1)
+        logger_info('df_result1:', df_result.groupby(['model']).head(1))
+        logger_info('df_result1:', df_result.groupby(['model']).tail(1))
+
+        check_result(df_result)
+
+        # Test 2 - 2 sources
+
+        df1b = df1.copy()
+        df1b.source = 's2'
+        df2 = pd.concat([df1, df1b], sort=False)
+
+        # df_result2 = df2.groupby('source').apply(get_pi, n=100).reset_index(drop=True)
+        df_result = get_pi(df2, n=100)
+        # logger_info('df_result2:', df_result2)
+        logger_info('df_result2:', df_result.groupby(['source', 'model']).head(1))
+        logger_info('df_result2:', df_result.groupby(['source', 'model']).tail(1))
+        # TODO: Add checks
+
+        check_result(df_result)
+
+        # Test 3 - Input has actuals but no forecast - can happen if fit not possible
+
+        df3 = df_actuals
+        df_result = get_pi(df3, n=100)
+        self.assertIsNotNone(df3)
+        self.assertFalse('q5' in df_result.columns)
+        # logger_info('df_result1:', df_result1)
+        logger_info('df_result3:', df_result.groupby(['source', 'model']).head(1))
+        logger_info('df_result3:', df_result.groupby(['source', 'model']).tail(1))
+        #
+        # Test 4 - Input has null values at the end
+        a_date_actuals = pd.date_range('2014-01-01', periods=10, freq='W')
+        a_y_actuals = np.arange(0, 10.)
+        df_actuals = (
+            pd.DataFrame({'date': a_date_actuals, 'y': a_y_actuals,
+                          'source': 's1', 'is_actuals': True, 'is_best_fit': False, 'model': 'actuals'})
+        )
+
+        a_date = pd.date_range('2014-01-01', periods=20, freq='W')
+        a_y = np.arange(0, 20.) + (np.tile([-1, 1], (10)) * np.arange(2, 0., -0.1))
+
+        df_fcast = (
+            pd.DataFrame({'date': a_date, 'y': a_y,
+                          'source': 's1', 'is_actuals': False, 'is_best_fit': True, 'model': 'linear'})
+        )
+
+        df1 = pd.concat([df_actuals, df_fcast], ignore_index=True, sort=False)
+        df_result = get_pi(df1, n=100)
+
+        a_date_actuals_withnull = pd.date_range('2014-01-01', periods=20, freq='W')
+        a_y_actuals_withnull = np.concatenate([np.arange(0, 10.), np.full(10, np.NaN)])
+        df_actuals_withnull = (
+            pd.DataFrame({'date': a_date_actuals, 'y': a_y_actuals,
+                          'source': 's1', 'is_actuals': True, 'is_best_fit': False, 'model': 'actuals'})
+        )
+
+        a_date_withnull = pd.date_range('2014-01-01', periods=20, freq='W')
+
+        df1_withnull = pd.concat([df_actuals_withnull, df_fcast], ignore_index=True, sort=False)
+        df_result_withnull = get_pi(df1_withnull, n=100)
+
+        logger_info('df_result:', df_result.groupby(['source', 'model']).tail(3))
+        logger_info('df_result with null:', df_result_withnull.groupby(['source', 'model']).tail(3))
+        # Prediction intervals are random, so we need to exclude them from comparison
+        self.assert_frame_equal(df_result[['date', 'source', 'is_actuals', 'model', 'y']],
+                                df_result_withnull[['date', 'source', 'is_actuals', 'model', 'y']])
+
+        # Test 4b - Input with null values at the end, weight column
+        df_weight = (
+            pd.DataFrame({'date': a_date, 'y': 1,
+                          'source': 's1', 'is_actuals': False, 'is_best_fit': True, 'model': 'linear',
+                          'is_weight': True})
+        )
+        df_weight_withnull = (
+            pd.DataFrame({'date': a_date_withnull, 'y': 1,
+                          'source': 's1', 'is_actuals': False, 'is_best_fit': True, 'model': 'linear',
+                          'is_weight': True})
+        )
+
+        df1['is_weight'] = False
+        df1_withnull['is_weight'] = False
+
+        df1b = pd.concat([df1, df_weight], ignore_index=True, sort=False)
+        df1b_withnull = pd.concat([df1_withnull, df_weight_withnull], ignore_index=True, sort=False)
+
+        df_result_b = get_pi(df1b, n=100)
+        df_result_b_withnull = get_pi(df1b_withnull, n=100)
+
+        logger_info('df_result b :', df_result_b.groupby(['source', 'model']).tail(3))
+        logger_info('df_result b with null:', df_result_b_withnull.groupby(['source', 'model']).tail(3))
+        # Prediction intervals are random, so we need to exclude them from comparison
+        self.assert_frame_equal(df_result_b[['date', 'source', 'is_actuals', 'model', 'y']],
+                                df_result_b_withnull[['date', 'source', 'is_actuals', 'model', 'y']])
+
+        check_result(df_result_b)
+        check_result(df_result_b_withnull)
+
+        # Test 4C - Input has null values at the start of actuals series
+        a_date_actuals = pd.date_range('2014-01-01', periods=10, freq='W')
+        a_y_actuals = np.arange(0, 10.)
+        df_actuals = (
+            pd.DataFrame({'date': a_date_actuals, 'y': a_y_actuals,
+                          'source': 's1', 'is_actuals': True, 'is_best_fit': False, 'model': 'actuals'})
+        )
+
+        a_date = pd.date_range('2014-01-01', periods=20, freq='W')
+        a_y = np.arange(0, 20.) + (np.tile([-1, 1], (10)) * np.arange(2, 0., -0.1))
+
+        df_fcast = (
+            pd.DataFrame({'date': a_date, 'y': a_y,
+                          'source': 's1', 'is_actuals': False, 'is_best_fit': True, 'model': 'linear'})
+        )
+
+        df1 = pd.concat([df_actuals, df_fcast], ignore_index=True, sort=False)
+        df_result = get_pi(df1, n=100)
+
+        a_date_actuals_withnull = pd.date_range('2014-01-01', periods=10, freq='W')
+        a_y_actuals_withnull = np.concatenate([np.full(5, np.NaN),np.arange(0, 5.)])
+        df_actuals_withnull = (
+            pd.DataFrame({'date': a_date_actuals_withnull, 'y': a_y_actuals_withnull,
+                          'source': 's1', 'is_actuals': True, 'is_best_fit': False, 'model': 'actuals'})
+        )
+
+        a_date_withnull = pd.date_range('2014-01-01', periods=10, freq='W')
+
+        df1_withnull = pd.concat([df_actuals_withnull, df_fcast], ignore_index=True, sort=False)
+        df_result_withnull = get_pi(df1_withnull, n=100)
+
+        logger_info('df_actuals_withnull:', df_actuals_withnull.groupby(['source', 'model']).head(20))
+        logger_info('df_result:', df_result.groupby(['source', 'model']).tail(3))
+        logger_info('df_result with null:', df_result_withnull.groupby(['source', 'model']).tail(100))
+        # todo - add proper expected value, uncomment assert
+        # self.assert_frame_equal(df_result[['date', 'source', 'is_actuals', 'model', 'y']],
+        #                         df_result_withnull[['date', 'source', 'is_actuals', 'model', 'y']])
+
+
+        # Test 4D - Input has null values at the start of actuals series
+        a_date_actuals = pd.date_range('2014-01-01', periods=10, freq='W')
+        a_y_actuals = np.arange(0, 10.)
+        df_actuals = (
+            pd.DataFrame({'date': a_date_actuals, 'y': a_y_actuals,
+                          'source': 's1', 'is_actuals': True, 'is_best_fit': False, 'model': 'actuals'})
+        )
+
+        a_date = pd.date_range('2014-01-01', periods=20, freq='W')
+        a_y = np.arange(0, 20.) + (np.tile([-1, 1], (10)) * np.arange(2, 0., -0.1))
+        a_y_withnull = np.concatenate([np.full(5,np.NaN),np.arange(0,15.),])
+
+        df_fcast = (
+            pd.DataFrame({'date': a_date, 'y': a_y,
+                          'source': 's1', 'is_actuals': False, 'is_best_fit': True, 'model': 'linear'})
+        )
+
+        df_fcast_withnull = (
+            pd.DataFrame({'date': a_date, 'y': a_y_withnull,
+                          'source': 's1', 'is_actuals': False, 'is_best_fit': True, 'model': 'linear'})
+        )
+
+        df1 = pd.concat([df_actuals, df_fcast], ignore_index=True, sort=False)
+        df_result = get_pi(df1, n=100)
+
+
+        df1_withnull = pd.concat([df_actuals, df_fcast_withnull], ignore_index=True, sort=False)
+        df_result_withnull = get_pi(df1_withnull, n=100)
+
+        logger_info('df_fcast_withnull:', df_fcast_withnull.groupby(['source', 'model']).head(20))
+        logger_info('df_result:', df_result.groupby(['source', 'model']).tail(100))
+        logger_info('df_result with null:', df_result_withnull.groupby(['source', 'model']).tail(100))
+        # Prediction intervals are random, so we need to exclude them from comparison
+        # self.assert_frame_equal(df_result[['date', 'source', 'is_actuals', 'model', 'y']],
+        #                         df_result_withnull[['date', 'source', 'is_actuals', 'model', 'y']])
+        # TODO: ADD VALID CHECK -
+
+
+    def test_get_pi_gap(self):
+        def check_result(df_result):
+            self.assertTrue('q5' in df_result.columns)
+
+        # Test 1 - Input has gaps
+
+        a_date_actuals = pd.date_range('2014-01-01', periods=10, freq='W')
+        a_y_actuals = np.arange(0, 10.)
+        df_actuals = (
+            pd.DataFrame({'date': a_date_actuals, 'y': a_y_actuals,
+                          'source': 's1', 'is_actuals': True, 'is_best_fit': False, 'model': 'actuals'})
+        )
+
+        a_date = pd.date_range('2014-01-01', periods=20, freq='W')
+        a_y = np.arange(0, 20.) + (np.tile([-1, 1], (10)) * np.arange(2, 0., -0.1))
+
+        df_fcast = (
+            pd.DataFrame({'date': a_date, 'y': a_y,
+                          'source': 's1', 'is_actuals': False, 'is_best_fit': True, 'model': 'linear'})
+        )
+
+        df_actuals_gap = pd.concat([df_actuals.head(3), df_actuals.tail(3)])
+
+        df = pd.concat([df_actuals_gap, df_fcast], ignore_index=True, sort=False)
+
+        df_result = get_pi(df, n=100)
+        # logger_info('df_result1:', df_result1)
+        logger_info('df_result1:', df_result.groupby(['source', 'model']).head(2))
+        logger_info('df_result1:', df_result.groupby(['source', 'model']).tail(2))
+
+        check_result(df_result)
+
+        # Test 2 - Input has nulls
+
+        df_actuals_null = df_actuals.copy()
+        df_actuals_null.loc[5, 'y'] = np.NaN
+
+        logger_info('df_actuals_null:', df_actuals_null)
+
+        df = pd.concat([df_actuals_null, df_fcast], ignore_index=True, sort=False)
+
+        df_result = get_pi(df, n=100)
+        # logger_info('df_result1:', df_result1)
+        logger_info('df_result2:', df_result.groupby(['source', 'model']).head(20))
+        logger_info('df_result2:', df_result.groupby(['source', 'model']).tail(20))
+
+        self.assertFalse(df_result.loc[df_result.date > df_actuals.date.max()].q5.isnull().any())
+
+        check_result(df_result)
+
+    def test_forecast_pi_missing(self):
+        path_candy = os.path.join(base_folder, 'candy_production.csv')
+        df_monthly_candy = pd.read_csv(path_candy)
+        dict_result = run_forecast(df_monthly_candy,
+                                   col_name_y='IPG3113N',
+                                   col_name_date='observation_date', extrapolate_years=2,
+                                   simplify_output=False)
+
+        df_fcast = dict_result.get('forecast')
+        logger_info('df_fcast: ', df_fcast.tail())
+
+        self.assertIn('q5', df_fcast.columns)
+
+    def test_run_forecast_yearly_model(self):
+        df1 = pd.DataFrame({'y': np.arange(0, 10.), 'date': pd.date_range('2000-01-01', periods=10, freq='YS')})
+        dict_result = run_forecast(simplify_output=False, df_y=df1, l_model_trend=[forecast_models.model_linear],
+                                   extrapolate_years=10.)
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(30))
+
+        # Repeat test - 2 sources
+
+        df1a = df1.copy()
+        df1b = df1.copy()
+        df1a['source'] = 'src1'
+        df1b['source'] = 'src2'
+        df2 = pd.concat([df1a, df1b], sort=False, ignore_index=True)
+
+        logger_info('df input:', df2)
+
+        dict_result = run_forecast(simplify_output=False, df_y=df2, l_model_trend=[forecast_models.model_linear],
+                                   extrapolate_years=10.)
+
+        df_data = dict_result['data']
+        df_metadata = dict_result['metadata']
+        df_optimize_info = dict_result['optimize_info']
+
+        logger_info('df_metadata:', df_metadata)
+        logger_info('df_optimize_info:', df_optimize_info)
+        logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))
+
+        # Same, with simplify_output=True
+
+        df_result = run_forecast(simplify_output=True, df_y=df2, l_model_trend=[forecast_models.model_linear],
+                                 extrapolate_years=10.)
+        logger_info('df_result:', df_result)
diff --git a/tests/test_forecast_model.py b/tests/test_forecast_model.py
new file mode 100644
index 0000000..f5dbe06
--- /dev/null
+++ b/tests/test_forecast_model.py
@@ -0,0 +1,900 @@
+"""
+
+Author: Pedro Capelastegui
+Created on 04/12/2015 
+"""
+
+import logging
+import unittest
+import itertools
+import pandas as pd, numpy as np
+from argparse import Namespace
+
+import numpy as np
+import pandas as pd
+from unittest import TestCase
+from anticipy.utils_test import PandasTest
+from anticipy.forecast_models import *
+from anticipy.forecast import normalize_df
+from anticipy.model_utils import interpolate_df
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+def logger_info(msg, data):
+    logger.info(msg + '\n%s', data)
+
+
+pd.set_option('display.max_columns', 40)
+pd.set_option('display.max_rows', 200)
+pd.set_option('display.width', 1000)
+
+
+def get_initial_guess(f_model, t_values):
+    return f_model(t_values, None, None, get_aic_k=False)
+
+def array_ones_in_indices(n, l_indices):
+    return np.isin(np.arange(0,n),l_indices).astype(float)
+
+def array_zeros_in_indices(n, l_indices):
+    return  (~np.isin(np.arange(0,n),l_indices)).astype(float)
+
+class TestForecastModel(PandasTest):
+    def setUp(self):
+        pass
+
+    def test_model_naive(self):
+        a_x = np.arange(0, 10)
+        a_date = pd.date_range('2014-01-01', periods=10, freq='D')
+        a_y = 10*a_x
+        df_actuals = pd.DataFrame({'date':a_date,'x':a_x,'y':a_y}).head()
+
+        a_y_result = model_naive(a_x, a_date, None, df_actuals=df_actuals)
+        logger_info('a_y result: ', a_y_result)
+        a_y_expected = np.array([0., 0., 10, 20., 30., 40, 40., 40., 40., 40.,])
+        self.assert_array_equal(a_y_result, a_y_expected)
+
+        # TODO: model composition disabled, check that exception is thrown
+        # # Model composition
+        # a_params = np.array([1.,0.,])
+        # a_y_result = (model_naive + model_linear) (a_x, a_date, a_params, df_actuals=df_actuals)
+        # logger_info('a_y result: ', a_y_result)
+
+    def test_model_snaive_wday(self):
+        a_x = np.arange(0, 21)
+        a_date = pd.date_range('2014-01-01', periods=21, freq='D')
+        a_y = 10.*a_x
+        df_actuals = pd.DataFrame({'date':a_date,'x':a_x,'y':a_y}).head(7)
+
+        a_y_result = model_snaive_wday(a_x, a_date, None, df_actuals=df_actuals)
+        logger_info('a_y result: ', a_y_result)
+        a_y_expected = np.array([np.NaN]*7+np.arange(0,70., 10.).tolist()*2)
+        self.assert_array_equal(a_y_result, a_y_expected)
+
+        # TODO: model composition disabled, check that exception is thrown
+        # # Model composition
+        # a_params = np.array([1.,0.,])
+        # a_y_result = (model_naive + model_linear) (a_x, a_date, a_params, df_actuals=df_actuals)
+        # logger_info('a_y result: ', a_y_result)
+
+    def test_forecast_model_simple_models(self):
+        # TODO: test all models with is_mult True and False
+
+        a_x = np.arange(0, 10)
+        a_date = pd.date_range('2014-01-01', periods=10, freq='D')
+
+        def test_model(name, model, params, a_expected, l_is_mult=None, a_date=a_date, a_x = a_x):
+            if l_is_mult is None:
+                l_is_mult = [True, False]
+            for is_mult in l_is_mult:
+                params = np.array(params)
+                a = model(a_x, a_date, params, is_mult)
+                logger_info('a {}, is_mult={} :'.format(name, is_mult), a)
+                self.assert_array_equal(a, a_expected)
+            # Test init params
+            params = model.f_init_params(None, None, None)
+            self.assertIsInstance(params, np.ndarray)
+            bounds = model.f_bounds(None, None, None)
+            logger.info('params: %s', params)
+            self.assertTrue(validate_initial_guess(params, bounds))
+            params = model.f_init_params(a_x, None, a_x)
+            self.assertIsInstance(params, np.ndarray)
+            bounds = model.f_bounds(a_x, None, a_x)
+            logger.info('a_x: %s', a_x)
+            logger.info('params: %s', params)
+            logger.info('bounds: %s', bounds)
+            self.assertTrue(validate_initial_guess(params, bounds))
+            params = model.f_init_params(None, a_x, a_date)
+            self.assertIsInstance(params, np.ndarray)
+            bounds = model.f_bounds(None, a_x, a_x)
+            logger.info('a_x: %s', a_x)
+            logger.info('params: %s', params)
+            logger.info('bounds: %s', bounds)
+            self.assertTrue(validate_initial_guess(params, bounds))
+            params = model.f_init_params(a_x, a_x, a_date)
+            self.assertIsInstance(params, np.ndarray)
+            bounds = model.f_bounds(a_x, a_x, a_x)
+            logger.info('params: %s', params)
+            self.assertTrue(validate_initial_guess(params, bounds))
+
+        test_model('constant', model_constant, [42],
+                   np.full(10, 42.))
+
+        test_model('linear', model_linear, [-1., 10],
+                   np.arange(10., 0, -1))
+
+        test_model('ramp', model_ramp, [5., 1.],
+                   np.concatenate([np.full(5, 0.), np.arange(0, 5.)]), [False])
+
+        test_model('ramp', model_ramp, [5., 1.],
+                   np.concatenate([np.full(5, 1.), np.arange(1, 6.)]), [True])
+
+        test_model('exp', model_exp, [10., 2],
+                   np.array([10., 20., 40., 80., 160., 320., 640., 1280., 2560., 5120.]))
+
+        test_model('step', model_step, [5., 100.],
+                   np.array(5 * [0.] + 5 * [100.]), [False])
+
+        test_model('step', model_step, [5., 100.],
+                   np.array(5 * [1.] + 5 * [100.]), [True])
+
+        test_model('step_date', get_model_step_date('2014-01-06'), [100.],
+                   np.array(5 * [0.] + 5 * [100.]), [False])
+
+        test_model('step_date', get_model_step_date('2014-01-06'), [100.],
+                   np.array(5 * [1.] + 5 * [100.]), [True])
+
+        test_model('spike', model_spike, [10., 4., 6.],
+                   np.array(4 * [0.] + 2 * [10.] + 4 * [0.]), [False])
+
+        test_model('spike', model_spike, [10., 4., 6.],
+                   np.array(4 * [1.] + 2 * [10.] + 4 * [1.]), [True])
+
+        test_model('spike_date', get_model_spike_date('2014-01-05', '2014-01-07'),
+                   [10.],
+                   np.array(4 * [0.] + 2 * [10.] + 4 * [0.]), [False])
+
+        test_model('spike_date', get_model_spike_date('2014-01-05', '2014-01-07'),
+                   [10.],
+                   np.array(4 * [1.] + 2 * [10.] + 4 * [1.]), [True])
+
+        test_model('2 steps', model_two_steps, [5., 100., 7, 200.],
+                   np.array(5 * [0.] + 2 * [100.] + 3 * [300.]), [False])
+
+        test_model('2 steps', model_two_steps, [5., 100., 7, 3.],
+                   np.array(5 * [1.] + 2 * [100.] + 3 * [300.]), [True])
+
+        test_model('season_wday', model_season_wday, 10 * np.arange(1., 7.),
+                   np.array([20., 30., 40., 50., 60., 0, 10., 20., 30., 40.]), [False])
+
+        test_model('season_wday', model_season_wday, 10 * np.arange(1., 7.),
+                   np.array([20., 30., 40., 50., 60., 1, 10., 20., 30., 40.]), [True])
+
+
+        a_x2 = np.arange(0, 12)
+        a_date2 = pd.date_range('2014-01-01', periods=12, freq='D')
+
+
+        test_model('season_month', model_season_month, 10*np.arange(2.,13.),
+                   np.array([60., 70., 80., 90., 100, 110., 120., 0., 20., 30.,40.,50., ]), [False],
+                   a_date= pd.date_range('2014-06-01', periods=12, freq='M'), a_x=a_x2)
+
+        test_model('season_month', model_season_month, 10*np.arange(2.,13.),
+                   np.array([60., 70., 80., 90., 100, 110., 120., 1., 20., 30., 40.,50.,]), [True],
+                   a_date= pd.date_range('2014-06-01', periods=12, freq='M'), a_x=a_x2)
+
+        test_model('season_fourier_yearly', model_season_month, 10*np.arange(2.,13.),
+                   np.array([60., 70., 80., 90., 100, 110., 120., 1., 20., 30.,40.,50., ]), [True],
+                   a_date= pd.date_range('2014-06-01', periods=12, freq='M'), a_x=a_x2)
+
+        # test fourier model
+        from anticipy.forecast_models import _f_init_params_fourier
+
+        for is_mult in [False, True]:
+            a_x = 10 * np.arange(2., 13.)
+            a_date = pd.date_range('2014-06-01', periods=10, freq='M')
+            params = _f_init_params_fourier()
+            a = model_season_fourier_yearly(a_x, a_date, params, is_mult)
+            logger_info('a {}, is_mult={} :'.format('model_season_fourier_yearly', is_mult), a)
+
+        for is_mult in [False, True]:
+            a_x = 10 * np.arange(2., 13.)
+            a_date = pd.date_range('2014-06-01', periods=10, freq='M')
+            params = np.full(20, 1.)
+            a = model_season_fourier_yearly(a_x, a_date, params, is_mult)
+            logger_info('a {}, is_mult={} :'.format('model_season_fourier_yearly', is_mult), a)
+
+    def test_forecast_model_composite(self):
+        a_x = np.arange(1, 11.)
+        a_y = np.arange(1, 11.)
+        a_date = pd.date_range('2014-01-01', periods=10, freq='D')
+        a_date_month = pd.date_range('2014-01-01', periods=10, freq='M')
+
+        dict_model = {
+            'constant': model_constant,
+            'linear': model_linear,
+            'ramp': model_ramp,
+            'exp': model_exp,
+            'season_wday': model_season_wday,
+            # TODO: ADD season_wday_2
+            'season_month': model_season_month,
+            'step': model_step,
+            'two_steps': model_two_steps,
+        }
+        dict_params = {
+            'constant': np.array([1.]),
+            'linear': np.array([1., 0.]),
+            'ramp': np.array([6., 1.]),
+            'exp': np.array([1., 2.]),
+            'season_wday': np.arange(1., 7.),
+            'season_month': np.arange(2., 13.),
+            'step': np.array([6., 100.]),
+            'two_steps': np.array([6., 100., 8, 200.]),
+        }
+        dict_expected_add = {
+            'constant': np.full(10, 1.),
+            'linear': np.arange(1., 11.),
+            'ramp': np.concatenate([np.full(5, 0.), np.arange(0, 5.)]),
+            'exp': 2 ** np.arange(1., 11.),
+            'season_wday': np.arange(2., 12., ) % 7,
+            'season_month': np.full(10, 0.),
+            'step': np.array(5 * [0.] + 5 * [100.]),
+            'two_steps': np.array(5 * [0.] + 2 * [100.] + 3 * [300.]),
+        }
+        dict_expected_mult = {
+            'constant': np.full(10, 1.),
+            'linear': np.arange(1., 11.),
+            'ramp': np.concatenate([np.full(5, 1.), np.arange(1, 6.)]),
+            'exp': 2 ** np.arange(1., 11.),
+            'season_wday': np.array([2., 3., 4., 5., 6., 1., 1., 2., 3., 4., ]),
+            'season_month': np.full(10, 1.),
+            'step': np.array(5 * [1.] + 5 * [100.]),
+            'two_steps': np.array(5 * [1.] + 2 * [100.] + 3 * [20000.]),
+        }
+
+        def test_model_1(key):
+            model = dict_model[key]
+            initial_guess = model.f_init_params(a_x, a_y)
+            logger.info('Testing model %s - name: %s', key, model.name)
+            self.assert_array_equal(model(a_x, a_date, dict_params[key]), dict_expected_add[key])
+            logger.info('Initial guess: %s', model.f_init_params(a_x, a_y))
+            self.assertEquals(len(initial_guess), model.n_params)
+
+        for key in dict_model.keys():
+            test_model_1(key)
+
+        def test_model_2_add(key1, key2):
+            model = dict_model[key1] + dict_model[key2]
+            initial_guess = model.f_init_params(a_x, a_y)
+            logger.info('Testing model %s, %s - name: %s', key1, key2, model.name)
+            logger.info('Parameters: %s , %s', dict_params[key1], dict_params[key2])
+            logger.info('Initial guess: %s', initial_guess)
+            self.assertEquals(len(initial_guess), model.n_params)
+            model_output = model(a_x, a_date,
+                                 np.concatenate([dict_params[key1], dict_params[key2]]))
+            logger.info('Model output: %s', model_output)
+            self.assert_array_equal(model_output,
+                                    dict_expected_add[key1] + dict_expected_add[key2])
+
+        for key1, key2 in itertools.product(dict_model.keys(), dict_model.keys()):
+            logger.info('Keys: %s , %s', key1, key2)
+            test_model_2_add(key1, key2)
+
+        def test_model_2_mult(key1, key2):
+            model = dict_model[key1] * dict_model[key2]
+            initial_guess = model.f_init_params(a_x, a_y)
+            logger.info('Testing model %s, %s - name: %s', key1, key2, model.name)
+            logger.info('Parameters: %s , %s', dict_params[key1], dict_params[key2])
+            logger.info('Initial guess: %s', initial_guess)
+            self.assertEquals(len(initial_guess), model.n_params)
+            model_output = model(a_x, a_date,
+                                 np.concatenate([dict_params[key1], dict_params[key2]]))
+            logger.info('Model output: %s', model_output)
+            self.assert_array_equal(model_output,
+                                    dict_expected_mult[key1] * dict_expected_mult[key2])
+
+        for key1, key2 in itertools.product(dict_model.keys(), dict_model.keys()):
+            logger.info('Keys: %s , %s', key1, key2)
+            test_model_2_mult(key1, key2)
+
+    def test_forecast_model_composite_null(self):
+        a_x = np.arange(0, 10.)
+        a_y = np.arange(0, 10.)
+        a_date = pd.date_range('2014-01-01', periods=10, freq='D')
+        a_date_month = pd.date_range('2014-01-01', periods=10, freq='M')
+
+        dict_model = {
+            'constant': model_constant,
+            'linear': model_linear,
+            'exp': model_exp,
+            'season_wday': model_season_wday,
+            'season_month': model_season_month,
+        }
+
+        dict_params = {
+            'constant': np.array([1.]),
+            'linear': np.array([1., 0.]),
+            'exp': np.array([1., 2.]),
+            'season_wday': np.arange(1., 7.),
+            'season_month': np.arange(1., 13.)
+        }
+        dict_expected = {
+            'constant': np.full(10, 1.),
+            'linear': np.arange(0., 10.),
+            'exp': 2 ** np.arange(0., 10.),
+            'season_wday': np.arange(2., 12., ) % 7,
+            'season_month': np.full(10, 0.),
+        }
+
+        def test_model_2_add_null(key1):
+            model = dict_model[key1] + model_null
+            initial_guess = model.f_init_params(a_x, a_y)
+            logger.info('Testing model %s, - name: %s', key1, model.name)
+            logger.info('Parameters: %s', dict_params[key1])
+            logger.info('Initial guess: %s', initial_guess)
+            self.assertEquals(len(initial_guess), model.n_params)
+            self.assert_array_equal(model(a_x, a_date,
+                                          dict_params[key1]),
+                                    dict_expected[key1])
+
+        for key in dict_model.keys():
+            test_model_2_add_null(key)
+
+        def test_model_2_mult_null(key1):
+            model_original = dict_model[key1]
+            model = model_original * model_null
+            initial_guess = model.f_init_params(a_x, a_y)
+            logger.info('Testing model %s, - name: %s', key1, model.name)
+            logger.info('Parameters: %s', dict_params[key1])
+            logger.info('Initial guess: %s', initial_guess)
+            self.assertEquals(model, model_original)
+
+        for key in dict_model.keys():
+            test_model_2_mult_null(key)
+
+    def test_forecast_model_composite_3(self):
+        # Test composition of 3+ models
+        a_x = np.arange(0, 10.)
+        a_y = np.arange(0, 10.)
+        a_date = pd.date_range('2014-01-01', periods=10, freq='D')
+        a_date_month = pd.date_range('2014-01-01', periods=10, freq='M')
+
+        dict_model = {
+            'constant': model_constant,
+            'linear': model_linear,
+            'ramp': model_ramp,
+            'exp': model_exp,
+            'season_wday': model_season_wday,
+            'season_month': model_season_month,
+        }
+
+        dict_params = {
+            'constant': np.array([1.]),
+            'linear': np.array([1., 0.]),
+            'ramp': np.array([6., 1.]),
+            'exp': np.array([1., 2.]),
+            'season_wday': np.arange(1., 7.),
+            'season_month': np.arange(1., 13.)
+        }
+        dict_expected = {
+            'constant': np.full(10, 1.),
+            'linear': np.arange(0., 10.),
+            'ramp': np.concatenate([np.full(5, 0.), np.arange(0, 5.)]),
+            'exp': 2 ** np.arange(0., 10.),
+            'season_wday':
+            # np.arange(2., 12., ) % 7,
+                np.array([2., 3., 4., 5., 6., 1., 1., 2., 3., 4.]),
+            'season_month': np.full(10, 1.),
+        }
+
+        def test_model_3(model, params, expected):
+            initial_guess = model.f_init_params(a_x, a_y)
+            logger.info('Testing model: %s', model.name)
+            logger.info('Parameters: %s', params)
+            logger.info('Initial guess: %s', initial_guess)
+            self.assertEquals(len(initial_guess), model.n_params)
+            self.assert_array_equal(model(a_x, a_date, params),
+                                    expected)
+
+        test_model_3(
+            (model_linear * model_linear) + model_constant,
+            np.concatenate([dict_params['linear'], dict_params['linear'], dict_params['constant']]),
+            (dict_expected['linear'] * dict_expected['linear']) + dict_expected['constant']
+        )
+
+        test_model_3(
+            model_linear * (model_linear + model_constant),
+            np.concatenate([dict_params['linear'], dict_params['linear'], dict_params['constant']]),
+            dict_expected['linear'] * (dict_expected['linear'] + dict_expected['constant'])
+        )
+
+        test_model_3(
+            (model_linear * model_season_wday) + model_constant,
+            np.concatenate([dict_params['linear'], dict_params['season_wday'], dict_params['constant']]),
+            (dict_expected['linear'] * dict_expected['season_wday']) + dict_expected['constant']
+        )
+
+    def test_forecast_model_bounds(self):
+
+        dict_model = {
+            'constant': model_constant,
+            'linear': model_linear,
+            'exp': model_exp,
+            'season_wday': model_season_wday,
+            'season_month': model_season_month,
+            'step': model_step,
+            'two_steps': model_two_steps,
+            'sigmoid_step': model_sigmoid_step,
+            'ramp': model_ramp
+        }
+        dict_expected = dict()
+        for model_name, model_obj in dict_model.items():
+            n_params = model_obj.n_params
+            exp = n_params * [-np.inf], n_params * [np.inf]
+            dict_expected[model_name] = exp
+
+        # Manually set the boundaries here
+        dict_expected['sigmoid_step'] = ([-np.inf, -np.inf, 0.0], [np.inf, np.inf, np.inf])
+
+        def test_model_bounds(key, model, expected):
+            bounds = model.f_bounds()
+            params = model.n_params
+            logger.info('Testing model: %s', model.name)
+            logger.info('Bounds: %s', bounds)
+            logger.info('Expected: %s', expected)
+            self.assertEquals(params, len(bounds[0]))
+            self.assertTupleEqual(bounds, expected)
+
+        for model_name, model_obj in dict_model.items():
+            test_model_bounds(model_name, model_obj, dict_expected[model_name])
+
+    def test_get_model_outliers(self):
+        # TODO: change input dfs to normalized form, rather than call normalize_df
+
+        # Test 1 - no outliers
+        a_y = [20.0, 20.1, 20.2, 20.3, 20.4, 20.5]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df = pd.DataFrame({'y': a_y}).pipe(normalize_df)
+
+        mask_step, mask_spike = get_model_outliers(df)
+        logger_info('Model 1:', mask_step)
+        self.assertIsNone(mask_step)
+
+        # 1b - with datetime index
+        df = pd.DataFrame({'y': a_y}, index=a_date).pipe(normalize_df)
+
+        mask_step, mask_spike = get_model_outliers(df)
+        logger_info('Model 1b:', mask_step)
+        self.assertIsNone(mask_step)
+
+        # Test 2 - Single step
+        a_y = np.array([19.8, 19.9, 20.0, 20.1, 20.2, 20.3, 20.4, 20.5,
+               20.6, 10., 10.1, 10.2, 10.3, 10.4,
+               10.5, 10.6, 10.7, 10.8, 10.9])
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df = pd.DataFrame({'y': a_y}).pipe(normalize_df)
+
+
+        mask_step,mask_spike = get_model_outliers(df)
+        logger_info('Model 2:', mask_step)
+        self.assert_array_equal(mask_step,
+                          array_ones_in_indices(a_y.size, 9))
+
+        # 2b - with date column
+        df = pd.DataFrame({'y': a_y}, index=a_date).pipe(normalize_df)
+
+        mask_step, mask_spike = get_model_outliers(df)
+        logger_info('Model 2b:', mask_step)
+        self.assert_array_equal(mask_step,
+                          array_ones_in_indices(a_y.size, 9))
+
+        # Test 3 - Two step changes
+        a_y = np.array([-1, 0, 1, 2, 3, 5, 6, 8, 10, 15, 16, 18,
+               20.1, 20.2, 20.3, 20.4, 20.5, 20.6,
+               10., 10.1, 10.2, 10.3, 10.4])
+
+        df = pd.DataFrame({'y': a_y}).pipe(normalize_df)
+
+
+        mask_step,mask_spike = get_model_outliers(df)
+        logger_info('Model 3:', mask_step)
+        self.assert_array_equal(mask_step,
+                          array_ones_in_indices(a_y.size, [9,18]))
+
+        # Test 4 - Consecutive changes
+        a_y = np.array([-1, 0, 1, 2, 3, 5, 6, 8, 15, 16, 21, 20.1,
+               20.2, 20.3, 20.4, 20.5, 20.6, 20.7, 20.8,
+               10., 10.1, 10.2, 10.3, 10.4])
+        df = pd.DataFrame({'y': a_y}).pipe(normalize_df)
+
+        mask_step,mask_spike = get_model_outliers(df)
+        logger_info('Model 4:', mask_step)
+        self.assert_array_equal(mask_step,
+                          array_ones_in_indices(a_y.size, [8,19]))
+
+
+        ## spikes
+
+        # Test 5 - 2 spikes and 1 step
+        a_y = np.array([19.8, 19.9, 30.0, 30.1, 20.2, 20.3, 20.4, 20.5,
+               20.6, 10., 10.1, 10.2, 10.3, 10.4,
+               10.5, 10.6, 30.7, 10.8, 10.9])
+
+        df = pd.DataFrame({'y': a_y}).pipe(normalize_df)
+
+        mask_step,mask_spike = get_model_outliers(df)
+        logger_info('Model 5:', mask_step)
+        logger_info('mask 5:',mask_spike)
+        self.assert_array_equal(mask_step,
+                          array_ones_in_indices(a_y.size, [9]))
+        self.assert_array_equal(mask_spike,
+                          array_zeros_in_indices(a_y.size, [2,3,16]))
+
+        # 5b - with datetime index
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df = pd.DataFrame({'y': a_y}, index=a_date).pipe(normalize_df)
+
+        mask_step,mask_spike = get_model_outliers(df)
+        logger_info('Model 5:', mask_step)
+        logger_info('mask 5:',mask_spike)
+        self.assert_array_equal(mask_step,
+                          array_ones_in_indices(a_y.size, [9]))
+        self.assert_array_equal(mask_spike,
+                          array_zeros_in_indices(a_y.size, [2,3,16]))
+
+        # Test 6 - single spike
+        a_y = np.array([19.8, 19.9, 20.0, 20.1, 20.2, 20.3, 20.4, 20.5,
+               20.6, 10., 20.8, 20.9, 21.0,
+               21.1, 21.2, 21.3, 21.4, 21.5, 21.6])
+
+        df = pd.DataFrame({'y': a_y}).pipe(normalize_df)
+
+        mask_step,mask_spike = get_model_outliers(df)
+        logger_info('Model 6:', mask_step)
+        logger_info('mask 6:',mask_spike)
+        self.assertEquals(str(mask_step),
+                          'None')
+        self.assert_array_equal(mask_spike,
+                          array_zeros_in_indices(a_y.size, [9]))
+
+
+        # Test 6b - single spike co-located with step
+        a_y = np.array([19.8, 19.9, 20.0, 20.1, 20.2, 20.3, 20.4, 20.5,
+               20.6, 10., 30.7, 30.8, 30.9, 31.0,
+               31.1, 31.2, 31.3, 31.4, 31.5])
+
+        df = pd.DataFrame({'y': a_y}).pipe(normalize_df)
+
+        mask_step,mask_spike = get_model_outliers(df)
+        logger_info('Model 6:', mask_step)
+        logger_info('mask 6:',mask_spike)
+        self.assert_array_equal(mask_step,
+                          array_ones_in_indices(a_y.size, [9]))
+        self.assert_array_equal(mask_spike,
+                          array_zeros_in_indices(a_y.size, [9]))
+
+
+    # TODO: Work in progress
+    def test_get_model_outliers_withgap(self):
+
+        # # Test 1 - short series with null value - nulls cause no outliers
+        a_y = [0., 1., np.NaN, 3.,4.,5.,6.,7.,]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df = pd.DataFrame({'y': a_y, 'date': a_date}).pipe(normalize_df)
+
+        mask_step, mask_spike = get_model_outliers(df)
+        logger_info('Model 1:', mask_step)
+        self.assertIsNone(mask_step)
+        self.assertIsNone(mask_spike)
+
+        # Test 1b -  series with multiple values per x -- raises ValueError
+        a_y = np.arange(0,10.)
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df = pd.DataFrame({'y': a_y, 'date': a_date})
+        df = pd.concat([df.head(5), df.head(6).tail(2)]).pipe(normalize_df)
+
+        with self.assertRaises(ValueError):
+            mask_step, mask_spike = get_model_outliers(df)
+
+        # Test 2 - short series with gap value - no real outliers
+        a_y = np.arange(0,10.)
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df = pd.DataFrame({'y': a_y, 'date': a_date})
+        df = pd.concat([df.head(5), df.tail(-6)]).pipe(normalize_df)
+
+        mask_step, mask_spike = get_model_outliers(df)
+        logger_info('Model 1:', mask_step)
+        self.assertIsNotNone(mask_step) # Incorrectly finds a step
+        self.assertIsNone(mask_spike) # No spikes
+
+        # Test 2b - after interpolating, can get outliers - finds none
+
+        df_nogap = df.pipe(interpolate_df, include_mask=True)
+        mask_step, mask_spike = get_model_outliers(df_nogap)
+        logger_info('df 1 - no gap:', df_nogap)
+        self.assertIsNone(mask_step)  # No steps
+        self.assertIsNone(mask_spike)  # No spikes
+
+
+        # # Test 3 - short series with gap value - with outliers
+        a_y = np.arange(0,10.)
+        a_y2 = np.arange(1, 11.)
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df = pd.DataFrame({'y': a_y, 'date': a_date})
+        df2 = pd.DataFrame({'y': a_y2, 'date': a_date})
+        df = pd.concat([df.head(5), df2.tail(-6)]).pipe(normalize_df)
+
+        mask_step, mask_spike = get_model_outliers(df)
+        logger_info('Model 1:', mask_step)
+        self.assertIsNotNone(mask_step) # Incorrectly finds a step
+        self.assertIsNone(mask_spike) # No spikes
+
+        # Test 3b - after interpolating with interpolate_df() - TODO: REMOVE THIS
+
+        df_nogap = df.pipe(interpolate_df, include_mask=True)
+        mask_step, mask_spike = get_model_outliers(df_nogap)
+
+        df_nogap ['mask_step']=mask_step
+        df_nogap['step_in_filled_gap'] = df_nogap.mask_step*df_nogap.is_gap_filled
+
+        df_nogap['mask_step_patch'] = df_nogap.step_in_filled_gap.shift(-1).fillna(0)
+        df_nogap = df_nogap.loc[~df_nogap.is_gap_filled]
+        df_nogap['mask_step_patch'] = df_nogap.mask_step_patch.shift(1).fillna(0)
+        df_nogap['mask_step'] = df_nogap.mask_step+df_nogap.mask_step_patch
+        df_nogap = df_nogap[['date','x','y','mask_step']]
+        logger_info('df 1 - no gap:', df_nogap)
+
+        self.assert_array_equal(df_nogap.mask_step,
+                          array_ones_in_indices(df_nogap.index.size, [5]))
+
+        self.assertIsNone(mask_spike)  # No spikes
+
+        # TODO: we need to
+        # - filter out filled gaps
+        # - get list of steps
+        # - if a step is in a filled gap, move to next sample
+
+        # Test 3c - same, with function
+
+        mask_step, mask_spike = get_model_outliers_withgap(df)
+        logger_info('Model 3c:', mask_step)
+        self.assert_array_equal(mask_step,
+                          array_ones_in_indices(df_nogap.index.size, [5]))
+        logger_info('mask_spike:', mask_spike)
+        logger_info('mask_step:', mask_step)
+
+        self.assertIsNone(mask_spike) # No spikes
+
+
+    # TODO: Work in progress
+    def test_get_model_outliers_adj_season(self):
+
+        # # Test 1 - short series - no outlier
+        # a_y = np.arange(4)
+        # a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        # df = pd.DataFrame({'y': a_y, 'date': a_date}).pipe(normalize_df)
+        #
+        # model, mask = get_model_outliers(df)
+        # logger_info('Model 1:', model)
+        # self.assertIsNone(model)
+
+        # Test 1 - short series - has outlier
+        a_y = np.array([0., 1., 1000., 3.,4.,5.])
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        df = pd.DataFrame({'y': a_y, 'date': a_date}).pipe(normalize_df)
+
+        mask_step, mask_spike = get_model_outliers(df)
+        logger_info('Model 1:', mask_step)
+        self.assertIsNone(mask_step)
+        self.assert_array_equal(mask_spike,
+                          array_zeros_in_indices(a_y.size, [2]))
+
+
+
+
+
+    def test_find_steps(self):
+        # No changes
+        a_y = [20.0, 20.1, 20.2, 20.3, 20.4, 20.5]
+        steps, spikes = find_steps_and_spikes(a_x=None, a_y=a_y, a_date=None)
+        assert len(steps) == 0
+        assert len(spikes) == 0
+
+        # Single step
+        a_y = [19.8, 19.9, 20.0, 20.1, 20.2, 20.3, 20.4, 20.5,
+               20.6, 10., 10.1, 10.2, 10.3, 10.4,
+               10.5, 10.6, 10.7, 10.8, 10.9]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        steps, spikes = find_steps_and_spikes(a_x=None, a_y=a_y, a_date=a_date)
+        assert steps
+        assert len(spikes) == 0
+        logger.info('Steps found = %s', len(steps))
+        assert len(steps) == 1
+
+        # Single spike
+        a_y = [19.8, 19.9, 20.0, 20.1, 20.2, 20.3, 20.4, 20.5,
+               20.6, 30., 30.1, 20.8, 20.9, 21.0, 21.1, 21.2,
+               21.3, 21.4, 21.5]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        steps, spikes = find_steps_and_spikes(a_x=None, a_y=a_y, a_date=a_date)
+        assert len(steps) == 0
+        assert spikes
+        logger.info('Spikes found = %s', len(spikes))
+        assert len(spikes) == 1
+
+        # Two changes
+        a_y = [-1, 0, 1, 2, 3, 5, 6, 8, 10, 15, 16, 18,
+               20.1, 20.2, 20.3, 20.4, 20.5, 20.6,
+               10., 10.1, 10.2, 10.3, 10.4]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        steps, spikes = find_steps_and_spikes(a_x=None, a_y=a_y, a_date=a_date, window=3)
+        assert steps
+        assert len(spikes) == 0
+        logger.info('Steps found = %s', len(steps))
+        assert len(steps) == 2
+
+        # Consecutive changes
+        a_y = [-1, 0, 1, 2, 3, 5, 6, 8, 15, 16, 21, 20.1,
+               20.2, 20.3, 20.4, 20.5, 20.6, 20.7, 20.8,
+               10., 10.1, 10.2, 10.3, 10.4]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        steps, spikes = find_steps_and_spikes(a_x=None, a_y=a_y, window=3, a_date=a_date)
+        assert steps
+        assert len(spikes) == 0
+        logger.info('Steps found = %s', len(steps))
+        assert len(steps) == 2
+
+        # Select number of changes
+        a_y = [-1, 0, 1, 2, 3, 5, 6, 8, 15, 16, 21, 20.1,
+               20.2, 20.3, 20.4, 20.5, 20.6, 20.7, 20.8,
+               10., 10.1, 10.2, 10.3, 10.4]
+        a_date = pd.date_range(start='2018-01-01', periods=len(a_y), freq='D')
+        steps, spikes = find_steps_and_spikes(a_x=None, a_y=a_y, a_date=a_date, window=3, max_changes=1)
+        assert steps
+        assert len(spikes) == 0
+        logger.info('Steps found = %s', len(steps))
+        assert len(steps) == 1
+        # d = changes[0]
+        # assert d['change_type'] == 'step'
+        # assert d['duration'] == 3
+        # Difference would be 7 (increase from 8 -> 15) + 5 (increase from 16 -> 21) = 12
+        # self.assertAlmostEqual(first=d['diff'], second=12.0)
+
+    def test_fixed_model_creation(self):
+        a_x = np.arange(0, 10)
+        a_date = pd.date_range('2014-01-01', periods=10, freq='D')
+
+        a1 = model_constant(a_x, a_date, np.array([42]))
+        model_constant_fixed = get_fixed_model(model_constant, np.array([42]))
+        print(model_constant_fixed)
+        a2 = model_constant_fixed(a_x, a_date, None)
+        self.assert_array_equal(a1, a2)
+
+    def test_fix_params_fmodel(self):
+        a_x = np.arange(0, 10)
+        a_date = pd.date_range('2014-01-01', periods=10, freq='D')
+
+        a1 = model_linear(a_x, a_date, np.array([10., -1.]))
+        model_linear_fixed = fix_params_fmodel(model_linear, [10., np.NaN])
+        logger_info('model_linear_fixed:', model_linear_fixed)
+        self.assertEquals(model_linear_fixed.n_params, 1)
+        a2 = model_linear_fixed(a_x, a_date, params=[-1.])
+        self.assert_array_equal(a1, a2)
+
+    # TODO: Implement test
+    def test_validate_model_bounds(self):
+        pass
+
+    def test_get_l_model_auto_season(self):
+
+        # 0. Test for series with single sample
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=1, freq='D')
+        l_expected = [model_null]
+        l_result = get_l_model_auto_season(a_date)
+        self.assert_array_equal(l_result, l_expected)
+
+        # 1. Tests for series with daily samples
+
+        # Test 1.1 - not enough samples for weekly seasonality
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=10, freq='D')
+        l_expected = [model_null]
+        l_result = get_l_model_auto_season(a_date)
+        self.assert_array_equal(l_result, l_expected)
+
+        # Test 1.2 - enough samples for weekly seasonality
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=12, freq='D')
+        l_expected = [model_null, model_season_wday]
+        l_result = get_l_model_auto_season(a_date, min_periods=1.5)
+        self.assert_array_equal(l_result, l_expected)
+
+        # Test 1.3 - Weekly and yearly seasonality
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=549, freq='D')
+        l_expected = [model_null, model_season_wday * model_season_fourier_yearly,
+                      model_season_wday, model_season_fourier_yearly]
+        l_result = get_l_model_auto_season(a_date, min_periods=1.5, season_add_mult='mult')
+        self.assert_array_equal(l_result, l_expected)
+
+        l_expected = [model_null, model_season_wday + model_season_fourier_yearly, model_season_wday,
+                      model_season_fourier_yearly]
+        l_result = get_l_model_auto_season(a_date, min_periods=1.5, season_add_mult='add')
+        self.assert_array_equal(l_result, l_expected)
+
+
+        # 2. Tests for series with weekly samples
+
+        # Test 2.2 - not enough samples for yearly seasonality
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=12, freq='W')
+        l_expected = [model_null]
+        l_result = get_l_model_auto_season(a_date, min_periods=1.5)
+        self.assert_array_equal(l_result, l_expected)
+
+        # Test 2.3 - Weekly and yearly seasonality
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=80, freq='W')
+        l_expected = [model_null, model_season_fourier_yearly]
+        l_result = get_l_model_auto_season(a_date, min_periods=1.5)
+        self.assert_array_equal(l_result, l_expected)
+
+        # 3. Tests for series with monthly samples
+
+        # Test 3.2 - not enough samples for yearly seasonality
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=12, freq='M')
+        l_expected = [model_null]
+        l_result = get_l_model_auto_season(a_date, min_periods=1.5)
+        self.assert_array_equal(l_result, l_expected)
+
+        # Test 3.3 - Weekly and yearly seasonality
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=20, freq='M')
+        l_expected = [model_null, model_season_fourier_yearly]
+        l_result = get_l_model_auto_season(a_date, min_periods=1.5)
+        self.assert_array_equal(l_result, l_expected)
+
+        # 4. Tests for series with quarterly samples
+
+        # Test 4.2 - not enough samples for yearly seasonality
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=5, freq='Q')
+        l_expected = [model_null]
+        l_result = get_l_model_auto_season(a_date, min_periods=1.5)
+        self.assert_array_equal(l_result, l_expected)
+
+        # Test 4.3 - Weekly and yearly seasonality
+        a_date = pd.a_date = pd.date_range('2014-01-01', periods=7, freq='Q')
+        l_expected = [model_null, model_season_fourier_yearly]
+        l_result = get_l_model_auto_season(a_date, min_periods=1.5)
+        self.assert_array_equal(l_result, l_expected)
+
+    def test_simplify_model(self):
+        # Test 1: normal bounds
+        model_dummy = Namespace()
+        model_dummy.f_bounds = lambda a_x, a_y, a_date: (np.array([3.]), np.array([7.]))
+        model_dummy.n_params = 1
+        model_dummy.name = 'dummy'
+
+        model_result = simplify_model(model_dummy)
+        logger_info('model_dummy', model_dummy)
+        logger_info('result:', model_result)
+        self.assertEquals(model_dummy, model_result)
+
+        # Test 2: min and max bounds match - model transformed into fixed model
+        model_dummy = Namespace()
+        model_dummy.f_bounds = lambda a_x, a_y, a_date: (np.array([5.]), np.array([5.]))
+        model_dummy.n_params = 1
+        model_dummy.name = 'dummy'
+
+        model_result = simplify_model(model_dummy)
+        logger_info('model_dummy', model_dummy)
+        logger_info('result:', model_result)
+        self.assertEquals(model_result.n_params, 0)
+
+    def test_validate_initial_guess(self):
+        result = validate_initial_guess(np.array([5., 5.]),
+                                        (np.array([0., 0.]), np.array([10., 10.])))
+        self.assertTrue(result)
+
+        result = validate_initial_guess(np.array([0., 10.]),
+                                        (np.array([0., 0.]), np.array([10., 10.])))
+        self.assertTrue(result)
+
+        result = validate_initial_guess(np.array([-1., 11.]),
+                                        (np.array([0., 0.]), np.array([10., 10.])))
+        self.assertFalse(result)
diff --git a/tests/test_forecast_plot.py b/tests/test_forecast_plot.py
new file mode 100644
index 0000000..63ea7b9
--- /dev/null
+++ b/tests/test_forecast_plot.py
@@ -0,0 +1,157 @@
+# -- Public Imports
+
+import logging
+import unittest
+from itertools import chain, repeat
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import itertools
+import pandas as pd
+from unittest import TestCase
+
+# -- Private Imports
+from anticipy.utils_test import PandasTest
+from anticipy import forecast_plot
+
+# -- Globals
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+def logger_info(msg, data):
+    logger.info(msg + '\n%s', data)
+
+
+base_folder = os.path.join(os.path.dirname(__file__), 'test_plots')
+
+def get_path_test_plot(name, is_matplotlib=False):
+    if is_matplotlib:
+        name = name+'_mpl'
+    file_name = '{}.png'.format(name)
+    return os.path.join(base_folder, file_name)
+
+
+df_forecast = (
+    pd.concat([
+        pd.DataFrame({'date': pd.date_range('2018-01-01', periods=6, freq='D'),
+                      'model': 'actuals',
+                      'y': 1000 * np.arange(0., 6.),
+                      'is_actuals': True
+                      }),
+        pd.DataFrame({'date': pd.date_range('2018-01-01', periods=10, freq='D'),
+                      'model': 'forecast',
+                      'y': 1000 * np.full(10, 5.),
+                      'is_actuals': False
+                      }),
+
+    ], sort=False, ignore_index=True)
+)
+
+df_forecast_pi = (
+    pd.concat([
+        pd.DataFrame({'date': pd.date_range('2018-01-01', periods=6, freq='D'),
+                      'model': 'actuals',
+                      'y': 1000 * np.arange(0., 6.),
+                      'is_actuals': True
+                      }),
+        pd.DataFrame({'date': pd.date_range('2018-01-01', periods=6, freq='D'),
+                      'model': 'forecast',
+                      'y': 1000 * np.full(6, 5.),
+                      'is_actuals': False
+                      }),
+        pd.DataFrame({'date': pd.date_range('2018-01-07', periods=4, freq='D'),
+                      'model': 'forecast',
+                      'y': 1000 * np.full(4, 5.),
+                      'is_actuals': False,
+                      'q5': 1000 * np.full(4, 4.),
+                      'q20': 1000 * np.full(4, 4.5),
+                      'q80': 1000 * np.full(4, 5.5),
+                      'q95': 1000 * np.full(4, 6.),
+                      }),
+
+    ], sort=False, ignore_index=True)
+)
+
+# Dataframe with different data sources, to plot with faceting
+df_forecast_p1 = df_forecast.copy()
+df_forecast_p2 = df_forecast.copy()
+df_forecast_p1['source'] = 'ts1'
+df_forecast_p2['source'] = 'ts2'
+df_forecast_facet = pd.concat([df_forecast_p1, df_forecast_p2], sort=False, ignore_index=True)
+
+df_forecast_p3 = df_forecast.copy()
+df_forecast_p4 = df_forecast.copy()
+df_forecast_p5 = df_forecast.copy()
+df_forecast_p3['source'] = 'ts3'
+df_forecast_p4['source'] = 'ts4'
+df_forecast_p5['source'] = 'ts5'
+df_forecast_facet_5 = pd.concat([df_forecast_p1,
+                                 df_forecast_p2,
+                                 df_forecast_p3,
+                                 df_forecast_p4,
+                                 df_forecast_p5], sort=False, ignore_index=True)
+
+
+# As above, with prediction interval
+# Dataframe with different data sources, to plot with faceting
+df_forecast_p1_pi = df_forecast_pi.copy()
+df_forecast_p2_pi = df_forecast_pi.copy()
+df_forecast_p1_pi['source'] = 'ts1'
+df_forecast_p2_pi['source'] = 'ts2'
+df_forecast_facet_pi = pd.concat([df_forecast_p1_pi, df_forecast_p2_pi], sort=False, ignore_index=True)
+
+
+
+class TestForecastPlot(PandasTest):
+
+    def test_ggplot_fcast_save(self):
+        is_matplotlib=True
+        path = get_path_test_plot('test',is_matplotlib)
+        forecast_plot.plot_forecast_save(df_forecast, path, 400, 300, 'Test Plot')
+        logger_info('plot saved to :', path)
+
+        path = get_path_test_plot('test_k',is_matplotlib)
+        forecast_plot.plot_forecast_save(df_forecast, path, 400, 300, 'Test Plot', scale='k')
+        logger_info('plot saved to :', path)
+
+        path = get_path_test_plot('test_m',is_matplotlib)
+        forecast_plot.plot_forecast_save(df_forecast, path, 400, 300, 'Test Plot', scale='M')
+        logger_info('plot saved to :', path)
+
+        # Todo: add checks about file creation, cleanup after running
+
+        logger_info('debug - df_forecast_facet', df_forecast_facet)
+
+        path = get_path_test_plot('test_facet',is_matplotlib)
+        forecast_plot.plot_forecast_save(df_forecast_facet, path, 400, 300, 'Test Plot')
+        logger_info('plot saved to :', path)
+
+        ## Repeat test with prediction intervals
+        # TODO: ADD _PI TO PATH NAME
+
+        path = get_path_test_plot('test',is_matplotlib)
+        forecast_plot.plot_forecast_save(df_forecast, path, 400, 300, 'Test Plot')
+        logger_info('plot saved to :', path)
+
+        path = get_path_test_plot('test_k',is_matplotlib)
+        forecast_plot.plot_forecast_save(df_forecast, path, 400, 300, 'Test Plot', scale='k')
+        logger_info('plot saved to :', path)
+
+        path = get_path_test_plot('test_m',is_matplotlib)
+        forecast_plot.plot_forecast_save(df_forecast, path, 400, 300, 'Test Plot', scale='M')
+        logger_info('plot saved to :', path)
+
+        # Todo: add checks about file creation, cleanup after running
+
+        logger_info('debug - df_forecast_facet', df_forecast_facet)
+
+        path = get_path_test_plot('test_facet')
+        forecast_plot.plot_forecast_save(df_forecast_facet, path, 400, 300, 'Test Plot')
+        logger_info('plot saved to :', path)
+
+    def test_plot_forecast(self):
+        i = forecast_plot.plot_forecast(df_forecast, 400, 300, 'Test Plot')
+        logger_info('plot output:', repr(i))
+        # Todo: add checks to validate Ipython.Image instance
diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py
new file mode 100644
index 0000000..f5be388
--- /dev/null
+++ b/tests/test_model_utils.py
@@ -0,0 +1,192 @@
+"""
+
+Author: Pedro Capelastegui
+Created on 04/12/2015 
+"""
+
+import logging
+import unittest
+from itertools import chain, repeat
+
+import numpy as np
+import os
+import itertools
+import pandas as pd
+from unittest import TestCase
+# This line fixes import errors
+from anticipy.utils_test import PandasTest
+from anticipy.model_utils import *
+from anticipy import forecast_models
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+def logger_info(msg, data):
+    logger.info(msg + '\n%s', data)
+
+
+# class TestModelUtils(TestCase):
+class TestModelUtils(PandasTest):
+    def setUp(self):
+        pass
+
+    def test_array_transpose(self):
+        a = np.arange(10)
+        self.assertEqual(a.shape, (10,))
+        at = array_transpose(a)
+        self.assertEqual(at.shape, (10, 1))
+
+    def test_apply_a_x_scaling(self):
+        a_x = np.arange(10)
+        l_models = [
+            # No model - default config
+            None,
+            # Model requires omega n
+            forecast_models.model_linear,
+            # Model requires scaling
+            forecast_models.model_sigmoid,
+            # aperiodic_models.get_model_logistic_4_param # Todo: Implement and test model function with a_x scaling
+        ]
+        for model in l_models:
+            # No asserts - just check that the function runs without crashing, and manually check results in logs
+            a_x = apply_a_x_scaling(a_x, model)
+            logger.info('f_model: %s', model)
+            logger_info('a_x', a_x)
+
+    def test_get_a_x_date_extrapolate(self):
+        # TODO: TEST Output size, scenarios with different frequencies
+        l_df_y = [
+            # Single ts
+            pd.Series(index=pd.date_range('2016-01-01', periods=10, freq='W'),
+                      data=np.arange(10)),
+            # Multiple ts
+            pd.DataFrame(index=pd.date_range('2016-01-01', periods=10, freq='W'),
+                         data={'a': np.arange(10), 'b': np.arange(10)})
+        ]
+        l_models = [
+            # No model - default config
+            None,
+            # Model requires omega n
+            forecast_models.model_linear,
+            # Model requires scaling
+            forecast_models.model_sigmoid,
+            # aperiodic_models.get_model_logistic_4_param # Todo: Implement and test model function with a_x scaling
+        ]
+        l_time_resolutions = [
+            # Default config
+            'W-SUN',
+            'W',
+            'W-MON',
+            'D',
+            'MS',
+            'YS'
+        ]
+        # logger_info('list_ts',l_df_y)
+        for (df_type, model, time_resolution) in itertools.product(['single', 'multi'], l_models, l_time_resolutions):
+            dict_df = {
+                # Single ts
+                'single': pd.Series(index=pd.date_range('2016-01-01', periods=10, freq=time_resolution),
+                                    data=np.arange(10)),
+                # Multiple ts
+                'multi': pd.DataFrame(index=pd.date_range('2016-01-01', periods=10, freq=time_resolution),
+                                      data={'a': np.arange(10), 'b': np.arange(10)})
+            }
+            ts = dict_df.get(df_type)
+
+            # No asserts - just check that the function runs without crashing, and manually check results in logs
+            s_x = get_s_x_extrapolate(ts.index.min(), ts.index.max(), model=model, freq=time_resolution,
+                                      extrapolate_years=1.0)
+            logger.info('type of df: %s, f_model: %s , time_resolution: %s', df_type, model, time_resolution)
+            logger_info('s_x', s_x.tail(3))
+            logger_info('a_dates', s_x.tail(3).index)
+            self.assertIsInstance(s_x.index, pd.DatetimeIndex)
+            self.assertLessEqual(s_x.index.max(), ts.index.max() + 1.1 * pd.Timedelta(1, 'Y'))
+            self.assertGreaterEqual(s_x.index.max(), ts.index.max() + 0.9 * pd.Timedelta(1, 'Y'))
+
+            # Check that all actuals values are in extrapolated series
+            self.assertEquals(np.setdiff1d(ts.index, s_x.index).size, 0)
+
+        ts = l_df_y[0]
+        model = l_models[0]
+        time_resolution = l_time_resolutions[0]
+        s_x = get_s_x_extrapolate(ts.index.min(), ts.index.max(), model=model, freq=time_resolution,
+                                  extrapolate_years=3.0)
+        logger.info('# of ts: %s, f_model: %s , time_resolution: %s', ts.shape, model, time_resolution)
+        logger_info('a_x', s_x.head(3))
+        logger_info('a_x index', s_x.head(3).index)
+        self.assertIsInstance(s_x.index, pd.DatetimeIndex)
+        logger_info('t_values len', len(s_x))
+        self.assertEquals(len(s_x), 10 + 3.0 * 52)
+
+        # Test with freq='D'
+        l_df_y = [
+            # Single ts
+            pd.Series(index=pd.date_range('2016-01-01', periods=10, freq='D'),
+                      data=np.arange(10)),
+            # Multiple ts
+            pd.DataFrame(index=pd.date_range('2016-01-01', periods=10, freq='D'),
+                         data={'a': np.arange(10), 'b': np.arange(10)})
+        ]
+        l_models = [
+            # No model - default config
+            None,
+            # Model requires omega n
+            forecast_models.model_linear,
+            # Model requires scaling
+            # aperiodic_models.get_model_logistic_4_param # Todo: Implement and test model function with a_x scaling
+        ]
+        l_time_resolutions = [
+            # Default config
+            'D'
+        ]
+        logger_info('list_ts', l_df_y)
+        for (ts, model, time_resolution) in itertools.product(l_df_y, l_models, l_time_resolutions):
+            # No asserts - just check that the function runs without crashing, and manually check results in logs
+            s_x = get_s_x_extrapolate(ts.index.min(), ts.index.max(), model=model, freq=time_resolution)
+            logger.info('# of ts: %s, f_model: %s , time_resolution: %s', ts.shape, model, time_resolution)
+            logger_info('s_x', s_x.tail(3))
+            logger_info('a_dates', s_x.tail(3).index)
+            self.assertIsInstance(s_x.index, pd.DatetimeIndex)
+
+        ts = l_df_y[0]
+        model = l_models[0]
+        time_resolution = l_time_resolutions[0]
+        s_x = get_s_x_extrapolate(ts.index.min(), ts.index.max(), model=model, freq=time_resolution,
+                                  extrapolate_years=3.0)
+        logger.info('# of ts: %s, f_model: %s , time_resolution: %s', ts.shape, model, time_resolution)
+        logger_info('t_values', s_x.tail(3))
+        logger_info('t_values_index', s_x.index)
+        self.assertIsInstance(s_x.index, pd.DatetimeIndex)
+        logger_info('t_values len', len(s_x))
+        self.assertEquals(len(s_x), 10 + 3.0 * 365)
+
+    def test_get_aic_c(self):
+
+        # Known error scenario: 0 error, 1 parameters - should return  -inf
+        aic_c1 = get_aic_c(0, 10, 1)
+        logger_info('AIC_C:', aic_c1)
+        self.assertTrue(np.isneginf(aic_c1))
+
+        def print_aic_c(fit_error, n, n_params):
+            aic_c1 = get_aic_c(fit_error, n, n_params)
+            logger.info('AIC_C (%s, %s, %s): %s', fit_error, n, n_params, aic_c1)
+
+        print_aic_c(0.1, 10, 1)
+        print_aic_c(0.1, 10, 2)
+        print_aic_c(0.1, 10, 3)
+        print_aic_c(0.001, 10, 1)
+        print_aic_c(0.001, 10, 2)
+        print_aic_c(0.001, 10, 3)
+        print_aic_c(0.1, 100, 1)
+        print_aic_c(0.1, 100, 2)
+        print_aic_c(0.1, 100, 3)
+        print_aic_c(0, 10, 1)
+        print_aic_c(0, 10, 2)
+        print_aic_c(0, 10, 3)
+
+    def test_get_s_aic_c_best_result_key(self):
+        s_tmp = pd.DataFrame({'c1': [1], 'c2': [2], 'c3': [-np.inf]}).set_index(['c1', 'c2'])['c3']
+        result1 = get_s_aic_c_best_result_key(s_tmp)
+        logger_info('DEBUG: ', result1)
+        self.assertTupleEqual(get_s_aic_c_best_result_key(s_tmp), (1, 2))