Source code for backtrader.analyzers.sharpe_ratio_stats

"""Sharpe Ratio Statistics Module - Advanced Sharpe ratio calculations.

This module provides functions for calculating Sharpe ratio statistics
including estimated, probabilistic, and defecto Sharpe ratios, along
with their confidence intervals and significance tests.

Functions:
    estimated_sharpe_ratio: Calculate basic Sharpe ratio.
    ann_estimated_sharpe_ratio: Calculate annualized Sharpe ratio.
    estimated_sharpe_ratio_stdev: Standard deviation of Sharpe estimation.
    probabilistic_sharpe_ratio: PSR calculation.
    min_track_record_length: Minimum track record for significance.
    sharpe_ratio_defacto: Defacto Sharpe ratio calculation.
"""

import numpy as np
import pandas as pd
from scipy import stats as scipy_stats


def _is_integer_like(value):
    try:
        return (
            not isinstance(value, (bool, np.bool_))
            and np.isscalar(value)
            and np.isfinite(value)
            and float(value).is_integer()
        )
    except (TypeError, ValueError):
        return False


def _is_finite_value(value):
    try:
        return bool(np.all(np.isfinite(np.asarray(value))))
    except (TypeError, ValueError):
        return False


def _average_upper_triangle_correlation(trials_returns):
    """Compute the mean pairwise correlation across trial return columns."""
    corr_matrix = trials_returns.corr()
    if corr_matrix.empty:
        return 0.0

    upper = corr_matrix.values[np.triu_indices_from(corr_matrix.values, 1)]
    if upper.size == 0:
        return 0.0

    avg_corr = np.nanmean(upper)
    if not np.isfinite(avg_corr):
        return 0.0

    return float(avg_corr)


[docs] def estimated_sharpe_ratio(returns): """ Calculate the estimated sharpe ratio (risk_free=0). Parameters ---------- returns: `np.array`, pd.Series, pd.DataFrame Returns ------- float, pd.Series """ if returns is None: raise ValueError("estimated_sharpe_ratio requires returns") if len(returns) <= 1: raise ValueError("estimated_sharpe_ratio requires at least 2 return samples") return returns.mean() / returns.std(ddof=1)
[docs] def ann_estimated_sharpe_ratio(returns=None, periods=261, *, sr=None): """ Calculate the annualized estimated sharpe ratio (risk_free=0). Parameters ---------- returns: `np.array`, pd.Series, pd.DataFrame periods: int How many items in `returns` complete a Year. If returns are daily: 261, weekly: 52, monthly: 12, ... sr: float, `np.array`, pd.Series, pd.DataFrame Sharpe ratio to be annualized, its frequency must be coherent with `periods` Returns ------- float, pd.Series """ if returns is None and sr is None: raise ValueError("ann_estimated_sharpe_ratio requires returns or sr") if not _is_integer_like(periods): raise ValueError("ann_estimated_sharpe_ratio requires integer periods") periods = int(periods) if periods <= 0: raise ValueError("ann_estimated_sharpe_ratio requires periods > 0") if sr is not None and not _is_finite_value(sr): raise ValueError("ann_estimated_sharpe_ratio requires finite sr") if sr is None: if len(returns) <= 1: raise ValueError( "ann_estimated_sharpe_ratio requires at least 2 return samples when sr is None" ) sr = estimated_sharpe_ratio(returns) sr = sr * np.sqrt(periods) return sr
def _validate_srstdev_params(_returns, n, skew, kurtosis, sr): """Validate/normalize estimated_sharpe_ratio_stdev inputs; return int n. Resolves n from _returns when not given, enforces integer n > 1 and finite skew/kurtosis/sr. Extracted from estimated_sharpe_ratio_stdev. """ if _returns is not None and n is None: n = len(_returns) if not _is_integer_like(n): raise ValueError("estimated_sharpe_ratio_stdev requires integer n") n = int(n) if n <= 1: raise ValueError("estimated_sharpe_ratio_stdev requires n > 1") if skew is not None and not _is_finite_value(skew): raise ValueError("estimated_sharpe_ratio_stdev requires finite skew") if kurtosis is not None and not _is_finite_value(kurtosis): raise ValueError("estimated_sharpe_ratio_stdev requires finite kurtosis") if sr is not None and not _is_finite_value(sr): raise ValueError("estimated_sharpe_ratio_stdev requires finite sr") return n
[docs] def estimated_sharpe_ratio_stdev(returns=None, *, n=None, skew=None, kurtosis=None, sr=None): """ Calculate the standard deviation of the sharpe ratio estimation. Parameters ---------- returns: `np.array`, pd.Series, pd.DataFrame If no `returns` are passed it is mandatory to pass the other four parameters. n: int Number of returns samples used for calculating `skew`, `kurtosis` and `sr`. skew: float, `np.array`, pd.Series, pd.DataFrame The third moment expressed in the same frequency as the other parameters. `Skew`=0 for normal returns. kurtosis: float, `np.array`, pd.Series, pd.DataFrame The fourth moment expressed in the same frequency as the other parameters. `Kurtosis`=3 for normal returns. sr: float, `np.array`, pd.Series, pd.DataFrame Sharpe ratio expressed in the same frequency as the other parameters. Returns ------- float, pd.Series Notes ----- This formula generalizes for both normal and non-normal returns. https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1821643 """ # if type(returns) != pd.DataFrame: # _returns = pd.DataFrame(returns) # else: # _returns = returns.copy() if returns is None: if any(param is None for param in (n, skew, kurtosis, sr)): raise ValueError( "estimated_sharpe_ratio_stdev requires n, skew, kurtosis, and sr when returns is None" ) _returns = None elif isinstance(returns, pd.DataFrame): _returns = pd.DataFrame(returns) else: _returns = returns.copy() n = _validate_srstdev_params(_returns, n, skew, kurtosis, sr) if _returns is not None and skew is None: skew_values = scipy_stats.skew(_returns) if isinstance(_returns, pd.DataFrame): skew = pd.Series(skew_values, index=_returns.columns) else: skew = skew_values if _returns is not None and kurtosis is None: kurtosis_values = scipy_stats.kurtosis(_returns, fisher=False) if isinstance(_returns, pd.DataFrame): kurtosis = pd.Series(kurtosis_values, index=_returns.columns) else: kurtosis = kurtosis_values if _returns is not None and sr is None: sr = estimated_sharpe_ratio(_returns) sr_std = np.sqrt((1 + (0.5 * sr**2) - (skew * sr) + (((kurtosis - 3) / 4) * sr**2)) / (n - 1)) if isinstance(returns, pd.DataFrame): sr_std = pd.Series(sr_std, index=returns.columns) elif type(sr_std) not in (float, np.float64, pd.DataFrame): sr_std = sr_std.values[0] return sr_std
[docs] def probabilistic_sharpe_ratio(returns=None, sr_benchmark=0.0, *, sr=None, sr_std=None): """ Calculate the Probabilistic Sharpe Ratio (PSR). Parameters ---------- returns: `np.array`, pd.Series, pd.DataFrame If no `returns` are passed it is mandatory to pass a `sr` and `sr_std`. sr_benchmark: float Benchmark sharpe ratio expressed in the same frequency as the other parameters. By default, set to zero (comparing against no investment skill). sr: float, `np.array`, pd.Series, pd.DataFrame Sharpe ratio expressed in the same frequency as the other parameters. sr_std: float, `np.array`, pd.Series, pd.DataFrame Standard deviation fo the Estimated sharpe ratio, expressed in the same frequency as the other parameters. Returns ------- float, pd.Series Notes ----- PSR(SR*) = probability that SR^ > SR* SR^ = sharpe ratio estimated with `returns`, or `sr` SR* = `sr_benchmark` https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1821643 """ if returns is None and any(param is None for param in (sr, sr_std)): raise ValueError("probabilistic_sharpe_ratio requires sr and sr_std when returns is None") if sr is not None and not _is_finite_value(sr): raise ValueError("probabilistic_sharpe_ratio requires finite sr") if not _is_finite_value(sr_benchmark): raise ValueError("probabilistic_sharpe_ratio requires finite sr_benchmark") if sr is None: sr = estimated_sharpe_ratio(returns) if sr_std is None: sr_std = estimated_sharpe_ratio_stdev(returns, sr=sr) if np.any(~np.isfinite(np.asarray(sr_std))) or np.any(np.asarray(sr_std) <= 0): raise ValueError("probabilistic_sharpe_ratio requires finite sr_std > 0") psr = scipy_stats.norm.cdf((sr - sr_benchmark) / sr_std) if isinstance(returns, pd.DataFrame): psr = pd.Series(psr, index=returns.columns) elif type(psr) not in (float, np.float64): psr = psr.iloc[0] if isinstance(psr, pd.Series) else psr[0] return psr
[docs] def min_track_record_length( returns=None, sr_benchmark=0.0, prob=0.95, *, n=None, sr=None, sr_std=None ): """ Calculate the MIn Track Record Length (minTRL). Parameters ---------- returns: `np.array`, pd.Series, pd.DataFrame If no `returns` are passed it is mandatory to pass a `sr` and `sr_std`. sr_benchmark: float Benchmark sharpe ratio expressed in the same frequency as the other parameters. By default, set to zero (comparing against no investment skill). prob: float Confidence level used for calculating the minTRL. Between 0 and 1, by default=0.95 n: int Number of returns samples used for calculating `sr` and `sr_std`. sr: float, `np.array`, pd.Series, pd.DataFrame Sharpe ratio expressed in the same frequency as the other parameters. sr_std: float, `np.array`, pd.Series, pd.DataFrame Standard deviation fo the Estimated sharpe ratio, expressed in the same frequency as the other parameters. Returns ------- float, pd.Series Notes ----- minTRL = minimum of returns/samples needed (with same SR and SR_STD) to accomplish a PSR(SR*) > `prob` PSR(SR*) = probability that SR^ > SR* SR^ = sharpe ratio estimated with `returns`, or `sr` SR* = `sr_benchmark` https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1821643 """ if returns is None and any(param is None for param in (n, sr, sr_std)): raise ValueError("min_track_record_length requires n, sr, and sr_std when returns is None") if not 0 < prob < 1: raise ValueError("min_track_record_length requires 0 < prob < 1") if not _is_finite_value(sr_benchmark): raise ValueError("min_track_record_length requires finite sr_benchmark") if n is None: n = len(returns) if not _is_integer_like(n): raise ValueError("min_track_record_length requires integer n") n = int(n) if n <= 1: raise ValueError("min_track_record_length requires n > 1") if sr is not None and not _is_finite_value(sr): raise ValueError("min_track_record_length requires finite sr") if sr is None: sr = estimated_sharpe_ratio(returns) if sr_std is None: sr_std = estimated_sharpe_ratio_stdev(returns, sr=sr) if np.any(~np.isfinite(np.asarray(sr_std))) or np.any(np.asarray(sr_std) <= 0): raise ValueError("min_track_record_length requires finite sr_std > 0") min_trl = 1 + (sr_std**2 * (n - 1)) * (scipy_stats.norm.ppf(prob) / (sr - sr_benchmark)) ** 2 if isinstance(returns, pd.DataFrame): min_trl = pd.Series(min_trl, index=returns.columns) elif type(min_trl) not in (float, np.float64): min_trl = min_trl.iloc[0] if isinstance(min_trl, pd.Series) else min_trl[0] return min_trl
[docs] def num_independent_trials(trials_returns=None, *, m=None, p=None): """ Calculate the number of independent trials. Parameters ---------- trials_returns: pd.DataFrame All trials returns, not only the independent trials. m: int Number of total trials. p: float Average correlation between all the trials. Returns ------- int """ if trials_returns is None and any(param is None for param in (m, p)): raise ValueError( "num_independent_trials requires trials_returns when m or p is not provided" ) if m is not None and not _is_integer_like(m): raise ValueError("num_independent_trials requires integer m") if m is not None: m = int(m) if m is not None and m <= 0: raise ValueError("num_independent_trials requires m > 0") if m is None: m = trials_returns.shape[1] if p is None: p = _average_upper_triangle_correlation(trials_returns) else: if isinstance(p, (bool, np.bool_)) or not np.isscalar(p): raise ValueError("num_independent_trials requires scalar p") try: p = float(p) except (TypeError, ValueError): raise ValueError("num_independent_trials requires scalar p") from None if not np.isfinite(p): p = 0.0 elif not -1 <= p <= 1: raise ValueError("num_independent_trials requires -1 <= p <= 1") n = p + (1 - p) * m n = int(n) + 1 # round up return n
[docs] def expected_maximum_sr( trials_returns=None, expected_mean_sr=0.0, *, independent_trials=None, trials_sr_std=None ): """ Compute the expected maximum Sharpe ratio (Analytically) Parameters ---------- trials_returns: pd.DataFrame All trials returns, not only the independent trials. expected_mean_sr: float Expected mean SR, usually 0. We assume that random startegies will have a mean SR of 0, expressed in the same frequency as the other parameters. independent_trials: int Number of independent trials must be between 1 and `trials_returns.shape[1]` trials_sr_std: float Standard deviation for the Estimated sharpe ratios of all trials, expressed in the same frequency as the other parameters. Returns ------- float """ emc = 0.5772156649 # Euler-Mascheroni constant if not _is_finite_value(expected_mean_sr): raise ValueError("expected_maximum_sr requires finite expected_mean_sr") if independent_trials is None: if trials_returns is None: raise ValueError("expected_maximum_sr requires trials_returns or independent_trials") independent_trials = num_independent_trials(trials_returns) if not _is_integer_like(independent_trials): raise ValueError("expected_maximum_sr requires integer independent_trials") independent_trials = int(independent_trials) if independent_trials < 1: raise ValueError("expected_maximum_sr requires independent_trials >= 1") if trials_returns is not None and independent_trials > trials_returns.shape[1]: raise ValueError( "expected_maximum_sr requires independent_trials <= number of trial return columns" ) if independent_trials <= 1: return expected_mean_sr if trials_sr_std is None: if trials_returns is None: raise ValueError( "expected_maximum_sr requires trials_returns or trials_sr_std when independent_trials > 1" ) srs = estimated_sharpe_ratio(trials_returns) trials_sr_std = srs.std() if np.any(np.isfinite(np.asarray(trials_sr_std)) & (np.asarray(trials_sr_std) < 0)): raise ValueError("expected_maximum_sr requires trials_sr_std >= 0") if not np.isfinite(trials_sr_std): return expected_mean_sr max_z = (1 - emc) * scipy_stats.norm.ppf( 1 - 1.0 / independent_trials ) + emc * scipy_stats.norm.ppf(1 - 1.0 / (independent_trials * np.e)) expected_max_sr = expected_mean_sr + (trials_sr_std * max_z) return expected_max_sr
[docs] def deflated_sharpe_ratio( trials_returns=None, returns_selected=None, expected_mean_sr=0.0, independent_trials=10, expected_max_sr=None, ): """ Calculate the Deflated Sharpe Ratio (PSR). Parameters ---------- trials_returns: pd.DataFrame All trials returns, not only the independent trials. returns_selected: pd.Series expected_mean_sr: float Expected mean SR, usually 0. We assume that random startegies will have a mean SR of 0, expressed in the same frequency as the other parameters. expected_max_sr: float The expected maximum sharpe ratio expected after running all the trials, expressed in the same frequency as the other parameters. independent_trials: int Returns ------- float Notes ----- DFS = PSR(SR⁰) = probability that SR^ > SR⁰ SR^ = sharpe ratio estimated with `returns`, or `sr` SR⁰ = `max_expected_sr` https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2460551 """ if returns_selected is None: raise ValueError("deflated_sharpe_ratio requires returns_selected") if expected_max_sr is None and trials_returns is None: raise ValueError( "deflated_sharpe_ratio requires trials_returns when expected_max_sr is None" ) if expected_max_sr is not None and not _is_finite_value(expected_max_sr): raise ValueError("deflated_sharpe_ratio requires finite expected_max_sr") if expected_max_sr is None: effective_independent_trials = independent_trials if trials_returns is not None: effective_independent_trials = min( effective_independent_trials, trials_returns.shape[1] ) expected_max_sr = expected_maximum_sr( trials_returns=trials_returns, expected_mean_sr=expected_mean_sr, independent_trials=effective_independent_trials, ) dsr = probabilistic_sharpe_ratio(returns=returns_selected, sr_benchmark=expected_max_sr) return dsr