Source code for backtrader.analyzers.sharpe_ratio_stats

"""Sharpe Ratio Statistics Module - Advanced Sharpe ratio calculations.

This module provides functions for calculating Sharpe ratio statistics
including estimated, probabilistic, and defecto Sharpe ratios, along
with their confidence intervals and significance tests.

Functions:
    estimated_sharpe_ratio: Calculate basic Sharpe ratio.
    ann_estimated_sharpe_ratio: Calculate annualized Sharpe ratio.
    estimated_sharpe_ratio_stdev: Standard deviation of Sharpe estimation.
    probabilistic_sharpe_ratio: PSR calculation.
    min_track_record_length: Minimum track record for significance.
    sharpe_ratio_defacto: Defacto Sharpe ratio calculation.
"""

import numpy as np
import pandas as pd
from scipy import stats as scipy_stats


def _is_integer_like(value):
    try:
        return (
            not isinstance(value, (bool, np.bool_))
            and np.isscalar(value)
            and np.isfinite(value)
            and float(value).is_integer()
        )
    except (TypeError, ValueError):
        return False


def _is_finite_value(value):
    try:
        return bool(np.all(np.isfinite(np.asarray(value))))
    except (TypeError, ValueError):
        return False


def _average_upper_triangle_correlation(trials_returns):
    """Compute the mean pairwise correlation across trial return columns."""
    corr_matrix = trials_returns.corr()
    if corr_matrix.empty:
        return 0.0

    upper = corr_matrix.values[np.triu_indices_from(corr_matrix.values, 1)]
    if upper.size == 0:
        return 0.0

    avg_corr = np.nanmean(upper)
    if not np.isfinite(avg_corr):
        return 0.0

    return float(avg_corr)



[docs]
def estimated_sharpe_ratio(returns):
    """
    Calculate the estimated sharpe ratio (risk_free=0).

    Parameters
    ----------
    returns: `np.array`, pd.Series, pd.DataFrame

    Returns
    -------
    float, pd.Series
    """
    if returns is None:
        raise ValueError("estimated_sharpe_ratio requires returns")
    if len(returns) <= 1:
        raise ValueError("estimated_sharpe_ratio requires at least 2 return samples")

    return returns.mean() / returns.std(ddof=1)




[docs]
def ann_estimated_sharpe_ratio(returns=None, periods=261, *, sr=None):
    """
    Calculate the annualized estimated sharpe ratio (risk_free=0).

    Parameters
    ----------
    returns: `np.array`, pd.Series, pd.DataFrame

    periods: int
        How many items in `returns` complete a Year.
        If returns are daily: 261, weekly: 52, monthly: 12, ...

    sr: float, `np.array`, pd.Series, pd.DataFrame
        Sharpe ratio to be annualized, its frequency must be coherent with `periods`

    Returns
    -------
    float, pd.Series
    """
    if returns is None and sr is None:
        raise ValueError("ann_estimated_sharpe_ratio requires returns or sr")
    if not _is_integer_like(periods):
        raise ValueError("ann_estimated_sharpe_ratio requires integer periods")
    periods = int(periods)
    if periods <= 0:
        raise ValueError("ann_estimated_sharpe_ratio requires periods > 0")
    if sr is not None and not _is_finite_value(sr):
        raise ValueError("ann_estimated_sharpe_ratio requires finite sr")

    if sr is None:
        if len(returns) <= 1:
            raise ValueError(
                "ann_estimated_sharpe_ratio requires at least 2 return samples when sr is None"
            )
        sr = estimated_sharpe_ratio(returns)
    sr = sr * np.sqrt(periods)
    return sr



def _validate_srstdev_params(_returns, n, skew, kurtosis, sr):
    """Validate/normalize estimated_sharpe_ratio_stdev inputs; return int n.

    Resolves n from _returns when not given, enforces integer n > 1 and finite
    skew/kurtosis/sr. Extracted from estimated_sharpe_ratio_stdev.
    """
    if _returns is not None and n is None:
        n = len(_returns)
    if not _is_integer_like(n):
        raise ValueError("estimated_sharpe_ratio_stdev requires integer n")
    n = int(n)
    if n <= 1:
        raise ValueError("estimated_sharpe_ratio_stdev requires n > 1")
    if skew is not None and not _is_finite_value(skew):
        raise ValueError("estimated_sharpe_ratio_stdev requires finite skew")
    if kurtosis is not None and not _is_finite_value(kurtosis):
        raise ValueError("estimated_sharpe_ratio_stdev requires finite kurtosis")
    if sr is not None and not _is_finite_value(sr):
        raise ValueError("estimated_sharpe_ratio_stdev requires finite sr")
    return n



[docs]
def estimated_sharpe_ratio_stdev(returns=None, *, n=None, skew=None, kurtosis=None, sr=None):
    """
    Calculate the standard deviation of the sharpe ratio estimation.

    Parameters
    ----------
    returns: `np.array`, pd.Series, pd.DataFrame
        If no `returns` are passed it is mandatory to pass the other four parameters.

    n: int
        Number of returns samples used for calculating `skew`, `kurtosis` and `sr`.

    skew: float, `np.array`, pd.Series, pd.DataFrame
        The third moment expressed in the same frequency as the other parameters.
        `Skew`=0 for normal returns.

    kurtosis: float, `np.array`, pd.Series, pd.DataFrame
        The fourth moment expressed in the same frequency as the other parameters.
        `Kurtosis`=3 for normal returns.

    sr: float, `np.array`, pd.Series, pd.DataFrame
        Sharpe ratio expressed in the same frequency as the other parameters.

    Returns
    -------
    float, pd.Series

    Notes
    -----
    This formula generalizes for both normal and non-normal returns.
    https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1821643
    """
    # if type(returns) != pd.DataFrame:
    #     _returns = pd.DataFrame(returns)
    # else:
    #     _returns = returns.copy()

    if returns is None:
        if any(param is None for param in (n, skew, kurtosis, sr)):
            raise ValueError(
                "estimated_sharpe_ratio_stdev requires n, skew, kurtosis, and sr when returns is None"
            )
        _returns = None
    elif isinstance(returns, pd.DataFrame):
        _returns = pd.DataFrame(returns)
    else:
        _returns = returns.copy()

    n = _validate_srstdev_params(_returns, n, skew, kurtosis, sr)

    if _returns is not None and skew is None:
        skew_values = scipy_stats.skew(_returns)
        if isinstance(_returns, pd.DataFrame):
            skew = pd.Series(skew_values, index=_returns.columns)
        else:
            skew = skew_values
    if _returns is not None and kurtosis is None:
        kurtosis_values = scipy_stats.kurtosis(_returns, fisher=False)
        if isinstance(_returns, pd.DataFrame):
            kurtosis = pd.Series(kurtosis_values, index=_returns.columns)
        else:
            kurtosis = kurtosis_values
    if _returns is not None and sr is None:
        sr = estimated_sharpe_ratio(_returns)

    sr_std = np.sqrt((1 + (0.5 * sr**2) - (skew * sr) + (((kurtosis - 3) / 4) * sr**2)) / (n - 1))

    if isinstance(returns, pd.DataFrame):
        sr_std = pd.Series(sr_std, index=returns.columns)
    elif type(sr_std) not in (float, np.float64, pd.DataFrame):
        sr_std = sr_std.values[0]

    return sr_std




[docs]
def probabilistic_sharpe_ratio(returns=None, sr_benchmark=0.0, *, sr=None, sr_std=None):
    """
    Calculate the Probabilistic Sharpe Ratio (PSR).

    Parameters
    ----------
    returns: `np.array`, pd.Series, pd.DataFrame
        If no `returns` are passed it is mandatory to pass a `sr` and `sr_std`.

    sr_benchmark: float
        Benchmark sharpe ratio expressed in the same frequency as the other parameters.
        By default, set to zero (comparing against no investment skill).

    sr: float, `np.array`, pd.Series, pd.DataFrame
        Sharpe ratio expressed in the same frequency as the other parameters.

    sr_std: float, `np.array`, pd.Series, pd.DataFrame
        Standard deviation fo the Estimated sharpe ratio,
        expressed in the same frequency as the other parameters.

    Returns
    -------
    float, pd.Series

    Notes
    -----
    PSR(SR*) = probability that SR^ > SR*
    SR^ = sharpe ratio estimated with `returns`, or `sr`
    SR* = `sr_benchmark`

    https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1821643
    """
    if returns is None and any(param is None for param in (sr, sr_std)):
        raise ValueError("probabilistic_sharpe_ratio requires sr and sr_std when returns is None")
    if sr is not None and not _is_finite_value(sr):
        raise ValueError("probabilistic_sharpe_ratio requires finite sr")
    if not _is_finite_value(sr_benchmark):
        raise ValueError("probabilistic_sharpe_ratio requires finite sr_benchmark")

    if sr is None:
        sr = estimated_sharpe_ratio(returns)
    if sr_std is None:
        sr_std = estimated_sharpe_ratio_stdev(returns, sr=sr)
    if np.any(~np.isfinite(np.asarray(sr_std))) or np.any(np.asarray(sr_std) <= 0):
        raise ValueError("probabilistic_sharpe_ratio requires finite sr_std > 0")

    psr = scipy_stats.norm.cdf((sr - sr_benchmark) / sr_std)

    if isinstance(returns, pd.DataFrame):
        psr = pd.Series(psr, index=returns.columns)
    elif type(psr) not in (float, np.float64):
        psr = psr.iloc[0] if isinstance(psr, pd.Series) else psr[0]

    return psr




[docs]
def min_track_record_length(
    returns=None, sr_benchmark=0.0, prob=0.95, *, n=None, sr=None, sr_std=None
):
    """
    Calculate the MIn Track Record Length (minTRL).

    Parameters
    ----------
    returns: `np.array`, pd.Series, pd.DataFrame
        If no `returns` are passed it is mandatory to pass a `sr` and `sr_std`.

    sr_benchmark: float
        Benchmark sharpe ratio expressed in the same frequency as the other parameters.
        By default, set to zero (comparing against no investment skill).

    prob: float
        Confidence level used for calculating the minTRL.
        Between 0 and 1, by default=0.95

    n: int
        Number of returns samples used for calculating `sr` and `sr_std`.

    sr: float, `np.array`, pd.Series, pd.DataFrame
        Sharpe ratio expressed in the same frequency as the other parameters.

    sr_std: float, `np.array`, pd.Series, pd.DataFrame
        Standard deviation fo the Estimated sharpe ratio,
        expressed in the same frequency as the other parameters.

    Returns
    -------
    float, pd.Series

    Notes
    -----
    minTRL = minimum of returns/samples needed (with same SR and SR_STD) to accomplish a PSR(SR*) > `prob`
    PSR(SR*) = probability that SR^ > SR*
    SR^ = sharpe ratio estimated with `returns`, or `sr`
    SR* = `sr_benchmark`

    https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1821643
    """
    if returns is None and any(param is None for param in (n, sr, sr_std)):
        raise ValueError("min_track_record_length requires n, sr, and sr_std when returns is None")
    if not 0 < prob < 1:
        raise ValueError("min_track_record_length requires 0 < prob < 1")
    if not _is_finite_value(sr_benchmark):
        raise ValueError("min_track_record_length requires finite sr_benchmark")

    if n is None:
        n = len(returns)
    if not _is_integer_like(n):
        raise ValueError("min_track_record_length requires integer n")
    n = int(n)
    if n <= 1:
        raise ValueError("min_track_record_length requires n > 1")
    if sr is not None and not _is_finite_value(sr):
        raise ValueError("min_track_record_length requires finite sr")
    if sr is None:
        sr = estimated_sharpe_ratio(returns)
    if sr_std is None:
        sr_std = estimated_sharpe_ratio_stdev(returns, sr=sr)
    if np.any(~np.isfinite(np.asarray(sr_std))) or np.any(np.asarray(sr_std) <= 0):
        raise ValueError("min_track_record_length requires finite sr_std > 0")

    min_trl = 1 + (sr_std**2 * (n - 1)) * (scipy_stats.norm.ppf(prob) / (sr - sr_benchmark)) ** 2

    if isinstance(returns, pd.DataFrame):
        min_trl = pd.Series(min_trl, index=returns.columns)
    elif type(min_trl) not in (float, np.float64):
        min_trl = min_trl.iloc[0] if isinstance(min_trl, pd.Series) else min_trl[0]

    return min_trl




[docs]
def num_independent_trials(trials_returns=None, *, m=None, p=None):
    """
    Calculate the number of independent trials.

    Parameters
    ----------
    trials_returns: pd.DataFrame
        All trials returns, not only the independent trials.

    m: int
        Number of total trials.

    p: float
        Average correlation between all the trials.

    Returns
    -------
    int
    """
    if trials_returns is None and any(param is None for param in (m, p)):
        raise ValueError(
            "num_independent_trials requires trials_returns when m or p is not provided"
        )
    if m is not None and not _is_integer_like(m):
        raise ValueError("num_independent_trials requires integer m")
    if m is not None:
        m = int(m)
    if m is not None and m <= 0:
        raise ValueError("num_independent_trials requires m > 0")

    if m is None:
        m = trials_returns.shape[1]

    if p is None:
        p = _average_upper_triangle_correlation(trials_returns)
    else:
        if isinstance(p, (bool, np.bool_)) or not np.isscalar(p):
            raise ValueError("num_independent_trials requires scalar p")
        try:
            p = float(p)
        except (TypeError, ValueError):
            raise ValueError("num_independent_trials requires scalar p") from None
        if not np.isfinite(p):
            p = 0.0
        elif not -1 <= p <= 1:
            raise ValueError("num_independent_trials requires -1 <= p <= 1")

    n = p + (1 - p) * m

    n = int(n) + 1  # round up

    return n




[docs]
def expected_maximum_sr(
    trials_returns=None, expected_mean_sr=0.0, *, independent_trials=None, trials_sr_std=None
):
    """
    Compute the expected maximum Sharpe ratio (Analytically)

    Parameters
    ----------
    trials_returns: pd.DataFrame
        All trials returns, not only the independent trials.

    expected_mean_sr: float
        Expected mean SR, usually 0. We assume that random startegies will have a mean SR of 0,
        expressed in the same frequency as the other parameters.

    independent_trials: int
        Number of independent trials must be between 1 and `trials_returns.shape[1]`

    trials_sr_std: float
        Standard deviation for the Estimated sharpe ratios of all trials,
        expressed in the same frequency as the other parameters.

    Returns
    -------
    float
    """
    emc = 0.5772156649  # Euler-Mascheroni constant
    if not _is_finite_value(expected_mean_sr):
        raise ValueError("expected_maximum_sr requires finite expected_mean_sr")

    if independent_trials is None:
        if trials_returns is None:
            raise ValueError("expected_maximum_sr requires trials_returns or independent_trials")
        independent_trials = num_independent_trials(trials_returns)

    if not _is_integer_like(independent_trials):
        raise ValueError("expected_maximum_sr requires integer independent_trials")
    independent_trials = int(independent_trials)
    if independent_trials < 1:
        raise ValueError("expected_maximum_sr requires independent_trials >= 1")
    if trials_returns is not None and independent_trials > trials_returns.shape[1]:
        raise ValueError(
            "expected_maximum_sr requires independent_trials <= number of trial return columns"
        )

    if independent_trials <= 1:
        return expected_mean_sr

    if trials_sr_std is None:
        if trials_returns is None:
            raise ValueError(
                "expected_maximum_sr requires trials_returns or trials_sr_std when independent_trials > 1"
            )
        srs = estimated_sharpe_ratio(trials_returns)
        trials_sr_std = srs.std()
    if np.any(np.isfinite(np.asarray(trials_sr_std)) & (np.asarray(trials_sr_std) < 0)):
        raise ValueError("expected_maximum_sr requires trials_sr_std >= 0")

    if not np.isfinite(trials_sr_std):
        return expected_mean_sr

    max_z = (1 - emc) * scipy_stats.norm.ppf(
        1 - 1.0 / independent_trials
    ) + emc * scipy_stats.norm.ppf(1 - 1.0 / (independent_trials * np.e))
    expected_max_sr = expected_mean_sr + (trials_sr_std * max_z)

    return expected_max_sr




[docs]
def deflated_sharpe_ratio(
    trials_returns=None,
    returns_selected=None,
    expected_mean_sr=0.0,
    independent_trials=10,
    expected_max_sr=None,
):
    """
    Calculate the Deflated Sharpe Ratio (PSR).

    Parameters
    ----------
    trials_returns: pd.DataFrame
        All trials returns, not only the independent trials.

    returns_selected: pd.Series

    expected_mean_sr: float
        Expected mean SR, usually 0. We assume that random startegies will have a mean SR of 0,
        expressed in the same frequency as the other parameters.

    expected_max_sr: float
        The expected maximum sharpe ratio expected after running all the trials,
        expressed in the same frequency as the other parameters.
    independent_trials: int

    Returns
    -------
    float

    Notes
    -----
    DFS = PSR(SR⁰) = probability that SR^ > SR⁰
    SR^ = sharpe ratio estimated with `returns`, or `sr`
    SR⁰ = `max_expected_sr`

    https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2460551
    """
    if returns_selected is None:
        raise ValueError("deflated_sharpe_ratio requires returns_selected")
    if expected_max_sr is None and trials_returns is None:
        raise ValueError(
            "deflated_sharpe_ratio requires trials_returns when expected_max_sr is None"
        )
    if expected_max_sr is not None and not _is_finite_value(expected_max_sr):
        raise ValueError("deflated_sharpe_ratio requires finite expected_max_sr")

    if expected_max_sr is None:
        effective_independent_trials = independent_trials
        if trials_returns is not None:
            effective_independent_trials = min(
                effective_independent_trials, trials_returns.shape[1]
            )

        expected_max_sr = expected_maximum_sr(
            trials_returns=trials_returns,
            expected_mean_sr=expected_mean_sr,
            independent_trials=effective_independent_trials,
        )

    dsr = probabilistic_sharpe_ratio(returns=returns_selected, sr_benchmark=expected_max_sr)

    return dsr