# Copyright (c) 2025 Adam Ulichny
#
# This source code is licensed under the MIT OR Apache-2.0 license
# that can be found in the LICENSE-MIT or LICENSE-APACHE files
# at the root of this source tree.
"""
powerlawrs: A Python package for analyzing power-law distributions.
"""
# Import the native Rust module
from . import _powerlawrs
import matplotlib.pyplot as plt
import numpy as np
# Expose the submodules from the native module at the package level
stats = _powerlawrs.stats
util = _powerlawrs.util
dist = _powerlawrs.dist
# For convenience, nested modules are exposed directly
exponential = dist.exponential
powerlaw = dist.powerlaw
pareto = dist.pareto
lognormal = dist.lognormal
# The `Powerlaw` class needs these
estimation = pareto.estimation
gof = pareto.gof
hypothesis = pareto.hypothesis
[docs]
class Powerlaw:
"""
A class to fit and analyze power-law distributions in a given dataset.
"""
def __init__(self, data):
"""
Initializes the Powerlaw object with data.
Args:
data (list[float]): The dataset to analyze.
"""
self.data = data
self.alphas = None
self.x_mins = None
self.ParetoFit = None
[docs]
def fit(self):
"""
Fits the data to a power-law distribution.
This method finds the optimal x_min and alpha parameters for the power-law
fit and assesses the goodness of fit. The results are stored in the
object's attributes.
"""
# Ensure data is sorted for some of the underlying functions
self.sorted_data = sorted(self.data)
# find_alphas_fast returns a list of tuples, but we want two separate lists
(self.x_mins, self.alphas) = estimation.find_alphas_fast(self.sorted_data)
# gof expects the full dataset, not just the tail
self.ParetoFit = gof.gof(self.sorted_data, self.x_mins, self.alphas)
return
[docs]
def plot(self):
"""
Plots the CCDF of the data and plots the model. Plots for the entire distribution
as well as just the tail are shown.
"""
if self.ParetoFit is None:
raise RuntimeError("You must call 'fit()' before plotting.")
# full-sample empirical CCDF
n = len(self.sorted_data)
# fit sorts ascending, we need descending.
y_all = np.arange(n, 0, -1) / n # P(X >= x) with denominator n
# extract tail data
tail = [x for x in self.sorted_data if x >= self.ParetoFit.x_min]
sorted_tail = sorted(tail, reverse=True)
m = len(sorted_tail)
y_tail = np.arange(1, m+1) / m # P(X >= x | x >= xmin) with denom m
# model lines
x_line = np.linspace(self.ParetoFit.x_min, max(self.sorted_data), 200)
s_tail_model = np.array([pareto.Pareto(self.ParetoFit.alpha, self.ParetoFit.x_min).ccdf(x) for x in x_line])
s_full_model = (m / n) * s_tail_model # S_full(x) to compare with full-sample CCDF
# Plot 1: full empirical CCDF + full-sample scaled model
plt.figure(figsize=(10,6))
plt.loglog(self.sorted_data, y_all, '.', label='Empirical CCDF')
plt.loglog(x_line, s_full_model, '-', lw=2, label='Pareto Type I')
plt.axvline(x=self.ParetoFit.x_min, color='k', ls='--', label=f'x_min={self.ParetoFit.x_min:.3g}')
plt.xlabel('x'); plt.ylabel('P(X >= x)')
plt.legend(); plt.grid(True, which='both', ls='--', alpha=0.6)
plt.title('Full-sample CCDF and Pareto Type I Model')
plt.show()
# Plot 2: tail-only empirical CCDF + tail-conditional model (CSN style)
plt.figure(figsize=(10,5))
plt.loglog(sorted_tail, y_tail, '.', label='Empirical tail CCDF')
plt.loglog(x_line, s_tail_model, '-', lw=2, label='Pareto Type I')
plt.axvline(x=self.ParetoFit.x_min, color='k', ls='--', label=f'x_min={self.ParetoFit.x_min:.3g}')
plt.xlabel('x'); plt.ylabel('P(X >= x | x >= x_min)')
plt.legend(); plt.grid(True, which='both', ls='--', alpha=0.6)
plt.title('Tail-only CCDF and Pareto Type I Model')
plt.show()
[docs]
def fit(data):
"""
Fits the data to a power-law distribution.
This function is a convenience wrapper that instantiates the Powerlaw class,
fits the data, and returns the ParetoFit results.
Args:
data (list[float]): The dataset to analyze.
Returns:
The ParetoFit result object.
"""
p = Powerlaw(data)
p.fit()
return p
# Define what gets imported with 'from powerlawrs import *'
__all__ = [
"fit",
"Powerlaw",
"stats",
"util",
"dist",
"exponential",
"lognormal",
"powerlaw",
"pareto",
"estimation",
"gof",
"hypothesis",
]
# Package-level metadata
__version__ = "0.1.0"