Source code for powerlawrs

# Copyright (c) 2025 Adam Ulichny
#
# This source code is licensed under the MIT OR Apache-2.0 license
# that can be found in the LICENSE-MIT or LICENSE-APACHE files
# at the root of this source tree.

"""
powerlawrs: A Python package for analyzing power-law distributions.
"""

# Import the native Rust module
from . import _powerlawrs
import matplotlib.pyplot as plt
import numpy as np


# Expose the submodules from the native module at the package level
stats = _powerlawrs.stats
util = _powerlawrs.util
dist = _powerlawrs.dist

# For convenience, nested modules are exposed directly
exponential = dist.exponential
powerlaw = dist.powerlaw
pareto = dist.pareto
lognormal = dist.lognormal

# The `Powerlaw` class needs these
estimation = pareto.estimation
gof = pareto.gof
hypothesis = pareto.hypothesis

[docs] class Powerlaw: """ A class to fit and analyze power-law distributions in a given dataset. """ def __init__(self, data): """ Initializes the Powerlaw object with data. Args: data (list[float]): The dataset to analyze. """ self.data = data self.alphas = None self.x_mins = None self.ParetoFit = None
[docs] def fit(self): """ Fits the data to a power-law distribution. This method finds the optimal x_min and alpha parameters for the power-law fit and assesses the goodness of fit. The results are stored in the object's attributes. """ # Ensure data is sorted for some of the underlying functions self.sorted_data = sorted(self.data) # find_alphas_fast returns a list of tuples, but we want two separate lists (self.x_mins, self.alphas) = estimation.find_alphas_fast(self.sorted_data) # gof expects the full dataset, not just the tail self.ParetoFit = gof.gof(self.sorted_data, self.x_mins, self.alphas) return
[docs] def plot(self): """ Plots the CCDF of the data and plots the model. Plots for the entire distribution as well as just the tail are shown. """ if self.ParetoFit is None: raise RuntimeError("You must call 'fit()' before plotting.") # full-sample empirical CCDF n = len(self.sorted_data) # fit sorts ascending, we need descending. y_all = np.arange(n, 0, -1) / n # P(X >= x) with denominator n # extract tail data tail = [x for x in self.sorted_data if x >= self.ParetoFit.x_min] sorted_tail = sorted(tail, reverse=True) m = len(sorted_tail) y_tail = np.arange(1, m+1) / m # P(X >= x | x >= xmin) with denom m # model lines x_line = np.linspace(self.ParetoFit.x_min, max(self.sorted_data), 200) s_tail_model = np.array([pareto.Pareto(self.ParetoFit.alpha, self.ParetoFit.x_min).ccdf(x) for x in x_line]) s_full_model = (m / n) * s_tail_model # S_full(x) to compare with full-sample CCDF # Plot 1: full empirical CCDF + full-sample scaled model plt.figure(figsize=(10,6)) plt.loglog(self.sorted_data, y_all, '.', label='Empirical CCDF') plt.loglog(x_line, s_full_model, '-', lw=2, label='Pareto Type I') plt.axvline(x=self.ParetoFit.x_min, color='k', ls='--', label=f'x_min={self.ParetoFit.x_min:.3g}') plt.xlabel('x'); plt.ylabel('P(X >= x)') plt.legend(); plt.grid(True, which='both', ls='--', alpha=0.6) plt.title('Full-sample CCDF and Pareto Type I Model') plt.show() # Plot 2: tail-only empirical CCDF + tail-conditional model (CSN style) plt.figure(figsize=(10,5)) plt.loglog(sorted_tail, y_tail, '.', label='Empirical tail CCDF') plt.loglog(x_line, s_tail_model, '-', lw=2, label='Pareto Type I') plt.axvline(x=self.ParetoFit.x_min, color='k', ls='--', label=f'x_min={self.ParetoFit.x_min:.3g}') plt.xlabel('x'); plt.ylabel('P(X >= x | x >= x_min)') plt.legend(); plt.grid(True, which='both', ls='--', alpha=0.6) plt.title('Tail-only CCDF and Pareto Type I Model') plt.show()
[docs] def fit(data): """ Fits the data to a power-law distribution. This function is a convenience wrapper that instantiates the Powerlaw class, fits the data, and returns the ParetoFit results. Args: data (list[float]): The dataset to analyze. Returns: The ParetoFit result object. """ p = Powerlaw(data) p.fit() return p
# Define what gets imported with 'from powerlawrs import *' __all__ = [ "fit", "Powerlaw", "stats", "util", "dist", "exponential", "lognormal", "powerlaw", "pareto", "estimation", "gof", "hypothesis", ] # Package-level metadata __version__ = "0.1.0"