bces
Advanced tools
| numpy | ||
| scipy | ||
| tqdm |
| [build-system] | ||
| requires = ["setuptools>=61.0"] | ||
| build-backend = "setuptools.build_meta" | ||
| [project] | ||
| name = "bces" | ||
| version = "2.0" | ||
| description = "Python module for performing linear regression for data with measurement errors and intrinsic scatter" | ||
| readme = "README.md" | ||
| license = "MIT" | ||
| requires-python = ">=3.8" | ||
| authors = [{name = "Rodrigo Nemmen", email = "rodrigo.nemmen@iag.usp.br"}] | ||
| keywords = ["statistics", "fitting", "linear-regression", "machine-learning"] | ||
| classifiers = [ | ||
| "Programming Language :: Python :: 3", | ||
| "Intended Audience :: Science/Research", | ||
| "Topic :: Scientific/Engineering :: Astronomy", | ||
| ] | ||
| dependencies = [ | ||
| "numpy", | ||
| "scipy", | ||
| "tqdm", | ||
| ] | ||
| [project.urls] | ||
| Homepage = "https://github.com/rsnemmen/BCES" |
| """ | ||
| Basic unit testing functionality. | ||
| """ | ||
| import bces.bces as BCES | ||
| import numpy as np | ||
| from pathlib import Path | ||
| # reads test dataset (path relative to this file, not cwd) | ||
| _DATA_PATH = Path(__file__).resolve().parent.parent / "doc" / "data.npz" | ||
| data = np.load(_DATA_PATH) | ||
| xdata = data['x'] | ||
| ydata = data['y'] | ||
| errx = data['errx'] | ||
| erry = data['erry'] | ||
| covdata = data['cov'] | ||
| # Correct fit parameters expected for dataset. The parameters in | ||
| # ans_pars are such that y=Ax+B: | ||
| # | ||
| # ans_pars = [ A(y|x), B(y|x), | ||
| # A(x|y), B(x|y), | ||
| # A(ort), B(ort) ] | ||
| # | ||
| # i.e., each pair contains the expected result for one of the BCES | ||
| # regression methods. | ||
| # | ||
| ans_pars = np.array([ [0.57955173, 17.88855826], | ||
| [0.26053751, 32.70952271], | ||
| [0.50709256, 21.25491222] ]) | ||
| # covariance matrix | ||
| ans_cov = np.array([[ 5.85029731e-04, -2.72808055e-02], | ||
| [-2.72808055e-02, 1.27299029e+00]]) | ||
| def test_yx(bces_result): | ||
| """Test BCES Y|X fit without bootstrapping.""" | ||
| a, b, erra, errb, covab = bces_result | ||
| np.testing.assert_array_almost_equal([ans_pars[0,0], ans_pars[0,1]], np.array([a[0], b[0]])) | ||
| def test_xy(bces_result): | ||
| """Test BCES X|Y fit without bootstrapping.""" | ||
| a, b, erra, errb, covab = bces_result | ||
| np.testing.assert_array_almost_equal([ans_pars[1,0], ans_pars[1,1]], np.array([a[1], b[1]])) | ||
| def test_ort(bces_result): | ||
| """Test BCES orthogonal fit without bootstrapping.""" | ||
| a, b, erra, errb, covab = bces_result | ||
| np.testing.assert_array_almost_equal([ans_pars[2,0], ans_pars[2,1]], np.array([a[3], b[3]])) | ||
| def test_bisector(bces_result): | ||
| """Test BCES bisector fit (index 2) without bootstrapping.""" | ||
| a, b, erra, errb, covab = bces_result | ||
| # Bisector slope should be between Y|X and X|Y slopes | ||
| assert ans_pars[1,0] <= a[2] <= ans_pars[0,0] | ||
| def test_covariance(bces_result): | ||
| """Test that covab output matches expected covariance matrix.""" | ||
| a, b, erra, errb, covab = bces_result | ||
| # covab[0] is covariance for Y|X method | ||
| np.testing.assert_almost_equal(covab[0], ans_cov[0,1], decimal=4) | ||
| def test_yxboot(): | ||
| """Test BCES Y|X fit with bootstrapping (parallel).""" | ||
| a, b, erra, errb, covab = BCES.bcesp(xdata, errx, ydata, erry, covdata) | ||
| np.testing.assert_array_almost_equal([ans_pars[0,0], ans_pars[0,1]], np.array([a[0], b[0]]), 1) | ||
| def test_ortboot(): | ||
| """Test BCES orthogonal fit with bootstrapping (parallel).""" | ||
| a, b, erra, errb, covab = BCES.bcesp(xdata, errx, ydata, erry, covdata) | ||
| np.testing.assert_array_almost_equal([ans_pars[2,0], ans_pars[2,1]], np.array([a[3], b[3]]), 1) | ||
| def test_bcesboot(): | ||
| """Test BCES Y|X fit with serial bootstrapping.""" | ||
| a, b, erra, errb, covab = BCES.bcesboot(xdata, errx, ydata, erry, covdata, nsim=1000) | ||
| np.testing.assert_array_almost_equal([ans_pars[0,0], ans_pars[0,1]], np.array([a[0], b[0]]), 1) | ||
| def test_bootstrap(): | ||
| """Test if bootstrap is working correctly.""" | ||
| import scipy.stats | ||
| nboot = 5000 | ||
| ts = [] | ||
| for i in range(nboot): | ||
| xsim = BCES.bootstrap(xdata) | ||
| tsim, asim, psim = scipy.stats.anderson_ksamp([xdata, xsim]) | ||
| ts.append(tsim) | ||
| ts = np.array(ts) | ||
| assert np.median(ts) < asim[0] | ||
| def test_bootstrap_list(): | ||
| """Test bootstrap with a list of arrays (index correspondence preserved).""" | ||
| result = BCES.bootstrap([xdata, ydata]) | ||
| assert isinstance(result, list) | ||
| assert len(result) == 2 | ||
| assert result[0].shape == xdata.shape | ||
| assert result[1].shape == ydata.shape | ||
| # Both arrays must use the same random indices: check one known pairing | ||
| # by verifying the resampled arrays come from the original data | ||
| for val in result[0]: | ||
| assert val in xdata | ||
| def test_allEqual(): | ||
| """Unit tests for the allEqual helper.""" | ||
| assert BCES.allEqual(np.array([3, 3, 3])) is True | ||
| assert BCES.allEqual(np.array([1, 2, 3])) is False | ||
| assert BCES.allEqual(np.array([1])) is True | ||
| def test_wls_known_slope(): | ||
| """ | ||
| WLS should recover the true slope and intercept for synthetic data | ||
| where X is error-free, Y has heteroscedastic errors + intrinsic scatter. | ||
| """ | ||
| rng = np.random.default_rng(7) | ||
| n = 300 | ||
| true_a, true_b = 3.0, -1.5 | ||
| x = rng.uniform(1, 10, n) | ||
| yerr = rng.uniform(0.1, 0.5, n) | ||
| scatter = rng.normal(0, 0.3, n) | ||
| y = true_a * x + true_b + scatter + rng.normal(0, yerr) | ||
| a, b, aerr, berr, covab = BCES.wls(x, y, yerr) | ||
| np.testing.assert_allclose(a, true_a, atol=0.15) | ||
| np.testing.assert_allclose(b, true_b, atol=0.5) | ||
| def test_wls_vs_ols(): | ||
| """ | ||
| WLS should give smaller variance than plain OLS for the same | ||
| heteroscedastic data setup. | ||
| """ | ||
| rng = np.random.default_rng(42) | ||
| n = 200 | ||
| true_a = 2.0 | ||
| slopes_wls, slopes_ols = [], [] | ||
| for _ in range(300): | ||
| x = rng.uniform(0, 10, n) | ||
| yerr = rng.uniform(0.1, 1.0, n) | ||
| y = true_a * x + rng.normal(0, yerr) | ||
| a_wls, *_ = BCES.wls(x, y, yerr) | ||
| slopes_wls.append(a_wls) | ||
| # Plain OLS | ||
| xm, ym = x.mean(), y.mean() | ||
| a_ols = np.sum((x - xm) * (y - ym)) / np.sum((x - xm)**2) | ||
| slopes_ols.append(a_ols) | ||
| assert np.std(slopes_wls) < np.std(slopes_ols) | ||
| def test_wlsboot(): | ||
| """WLS bootstrap should recover the true slope to 1 decimal place.""" | ||
| rng = np.random.default_rng(99) | ||
| n = 200 | ||
| true_a, true_b = 2.5, 1.0 | ||
| x = rng.uniform(0, 10, n) | ||
| yerr = rng.uniform(0.1, 0.3, n) | ||
| scatter = rng.normal(0, 0.5, n) | ||
| y = true_a * x + true_b + scatter + rng.normal(0, yerr) | ||
| a, b, aerr, berr, covab = BCES.wlsp(x, y, yerr, nsim=2000) | ||
| np.testing.assert_allclose(a, true_a, atol=0.2) | ||
| np.testing.assert_allclose(b, true_b, atol=0.5) | ||
| def test_known_slope(): | ||
| """ | ||
| Fit synthetic data with a known true slope, intercept, and intrinsic | ||
| scatter, and verify BCES Y|X recovers them within reasonable tolerances. | ||
| """ | ||
| rng = np.random.default_rng(42) | ||
| n = 200 | ||
| true_a, true_b = 2.5, 1.0 | ||
| x_true = rng.uniform(0, 10, n) | ||
| xerr = rng.uniform(0.1, 0.3, n) | ||
| yerr = rng.uniform(0.1, 0.3, n) | ||
| scatter = rng.normal(0, 0.5, n) # intrinsic scatter | ||
| # observed values with measurement noise and scatter | ||
| x = x_true + rng.normal(0, xerr) | ||
| y = true_a * x_true + true_b + scatter + rng.normal(0, yerr) | ||
| a, b, erra, errb, covab = BCES.bces(x, xerr, y, yerr, np.zeros(n)) | ||
| np.testing.assert_allclose(a[0], true_a, atol=0.2) | ||
| np.testing.assert_allclose(b[0], true_b, atol=0.5) |
+65
-108
@@ -1,40 +0,33 @@ | ||
| Metadata-Version: 2.1 | ||
| Metadata-Version: 2.4 | ||
| Name: bces | ||
| Version: 1.5.1 | ||
| Version: 2.0 | ||
| Summary: Python module for performing linear regression for data with measurement errors and intrinsic scatter | ||
| Home-page: https://github.com/rsnemmen/BCES | ||
| Download-URL: https://github.com/rsnemmen/BCES/archive/1.5.1.tar.gz | ||
| Download-URL: https://github.com/rsnemmen/BCES/archive/1.6.tar.gz | ||
| Author: Rodrigo Nemmen | ||
| Author-email: rodrigo.nemmen@iag.usp.br | ||
| License: MIT License | ||
| Copyright (c) 2019 Rodrigo Nemmen | ||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| of this software and associated documentation files (the "Software"), to deal | ||
| in the Software without restriction, including without limitation the rights | ||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| copies of the Software, and to permit persons to whom the Software is | ||
| furnished to do so, subject to the following conditions: | ||
| The above copyright notice and this permission notice shall be included in all | ||
| copies or substantial portions of the Software. | ||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
| SOFTWARE. | ||
| Author-email: Rodrigo Nemmen <rodrigo.nemmen@iag.usp.br> | ||
| License-Expression: MIT | ||
| Project-URL: Homepage, https://github.com/rsnemmen/BCES | ||
| Keywords: statistics,fitting,linear-regression,machine-learning | ||
| Classifier: Programming Language :: Python :: 3 | ||
| Classifier: Intended Audience :: Science/Research | ||
| Classifier: Topic :: Scientific/Engineering :: Astronomy | ||
| Requires-Python: >=3.8 | ||
| Description-Content-Type: text/markdown | ||
| License-File: LICENSE | ||
| Requires-Dist: numpy | ||
| Requires-Dist: scipy | ||
| Requires-Dist: tqdm | ||
| Dynamic: author | ||
| Dynamic: download-url | ||
| Dynamic: home-page | ||
| Dynamic: license-file | ||
| Dynamic: requires-python | ||
| Linear regression for data with measurement errors and intrinsic scatter (BCES) | ||
| ========== | ||
| BCES and WLS: Linear regression for data with measurement errors and intrinsic scatter | ||
| ========================================= | ||
| Python module for performing robust linear regression on (X,Y) data points where both X and Y have measurement errors. | ||
| Python module for performing robust linear regression on (X,Y) data points with measurement errors. | ||
| The fitting method is the *bivariate correlated errors and intrinsic scatter* (BCES) and follows the description given in [Akritas & Bershady. 1996, ApJ](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/). Some of the advantages of BCES regression compared to ordinary least squares fitting (quoted from Akritas & Bershady 1996): | ||
| The **BCES** fitting method is the *bivariate correlated errors and intrinsic scatter* (BCES) and follows the description given in [Akritas & Bershady. 1996, ApJ](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/). Some of the advantages of BCES regression compared to ordinary least squares (OLS) fitting: | ||
@@ -46,19 +39,14 @@ * it allows for measurement errors on both variables | ||
| In order to understand how to perform and interpret the regression results, please read the paper. | ||
| The **WLS** (weighted least squares) method handles the case where only Y has measurement errors and X is treated as error-free. It accounts for intrinsic scatter in the data and follows Akritas & Bershady 1996, §2.3. | ||
| # Installation | ||
| ## Installation | ||
| Using `pip`: | ||
| pip install bces | ||
| If that does not work, you can install it using the `setup.py` script: | ||
| Alternatively, if you plan to modify the source then install the package with a symlink, so that changes to the source files will be immediately available: | ||
| python setup.py install | ||
| pip install -e . | ||
| You may need to run the last command with `sudo`. | ||
| Alternatively, if you plan to modify the source then install the package with a symlink, so that changes to the source files will be immediately available: | ||
| python setup.py develop | ||
@@ -68,7 +56,6 @@ | ||
| ## Usage | ||
| ### BCES | ||
| # Usage | ||
| import bces.bces as BCES | ||
@@ -93,3 +80,3 @@ a,b,aerr,berr,covab=BCES.bcesp(x,xerr,y,yerr,cov) | ||
| Each element of the arrays *a*, *b*, *aerr*, *berr* and *covab* correspond to the result of one of the different BCES lines: *y|x*, *x|y*, bissector and orthogonal, as detailed in the table below. Please read the [original BCES paper](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) to understand what these different lines mean. | ||
| Each element of the arrays `a`, `b`, `aerr`, `berr` and `covab` correspond to the result of one of the different BCES lines: $y|x$, $x|y$, bissector and orthogonal, as detailed in the table below. Please read the [original BCES paper](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) to understand what these different lines mean. | ||
@@ -101,101 +88,71 @@ | ||
| | 1 | *x\|y* | Assumes *y* as the independent variable | | ||
| | 2 | bissector | Line that bisects the *y\|x* and *x\|y*. This approach is self-inconsistent, *do not use this method*, cf. [Hogg, D. et al. 2010, arXiv:1008.4686](http://labs.adsabs.harvard.edu/adsabs/abs/2010arXiv1008.4686H/). | | ||
| | 2 | bissector | Line that bisects the *y\|x* and *x\|y*. This approach is self-inconsistent, [*do not use this method*](https://arxiv.org/abs/1008.4686). | | ||
| | 3 | orthogonal | Orthogonal least squares: line that minimizes orthogonal distances. Should be used when it is not clear which variable should be treated as the independent one | | ||
| By default, `bcesp` run in parallel with bootstrapping. | ||
| By default, `bcesp` runs the bootstrapping in parallel. | ||
| ### WLS | ||
| import bces.bces as BCES | ||
| a,b,aerr,berr,covab=BCES.wls(x,y,yerr) | ||
| Arguments: | ||
| # Examples | ||
| - *x,y*: 1D data arrays | ||
| - *yerr*: measurement errors affecting y, 1D array | ||
| [`bces-example.ipynb` is a jupyter notebook](https://github.com/rsnemmen/BCES/blob/master/bces-examples.ipynb) including a practical, step-by-step example of how to use BCES to perform regression on data with uncertainties on x and y. It also illustrates how to plot the confidence band for a fit. | ||
| Output: | ||
|  | ||
| - *a,b*: best-fit slope and intercept of the linear regression such that *y = Ax + B* (scalars) | ||
| - *aerr,berr*: the standard deviations in a,b | ||
| - *covab*: the covariance between a and b | ||
| If you have suggestions of more examples, feel free to add them. | ||
| Note that unlike BCES, WLS returns scalar values (a single regression line) rather than 4-element arrays. | ||
| The `wlsp` method performs bootstrapping in parallel, if you need that. | ||
| ### When to use BCES or WLS? | ||
| # Running Tests | ||
| Both methods return unbiased estimates of the slope and intercept, but they suit different statistical situations: | ||
| To test your installation, run the following command inside the BCES directory: | ||
| - **Use BCES** when both X and Y have measurement errors, or when measurement errors on X and Y may be correlated. | ||
| - **Use WLS** when only Y has measurement errors (X is error-free or its errors are negligible). | ||
| ```bash | ||
| pytest -v | ||
| ``` | ||
| Both methods account for intrinsic scatter. | ||
| **Why choose WLS over OLS?** | ||
| When only Y has measurement errors, prefer WLS over OLS. OLS assigns equal weight to every data point regardless of measurement uncertainty, while WLS weights each point by the inverse of its error variance so more precisely measured points have greater influence on the fit. This produces more accurate and statistically efficient estimates when data points have heteroscedastic (unequal) errors. | ||
| # Requirements | ||
| See `requirements.txt`. | ||
| ## Examples | ||
| [`bces-examples.ipynb` is a jupyter notebook](https://github.com/rsnemmen/BCES/blob/master/doc/bces-examples.ipynb) including a practical, step-by-step example of how to use BCES to perform regression on data with uncertainties on x and y. It also illustrates how to plot the confidence band for a fit. | ||
| # Citation | ||
| [`wls.ipynb` is a jupyter notebook](https://github.com/rsnemmen/BCES/blob/master/doc/wls.ipynb) with examples of WLS regression, including fits with intrinsic scatter. | ||
| If you end up using this code in your paper, you are morally obliged to cite the following works | ||
|  | ||
| - The original BCES paper: [Akritas, M. G., & Bershady, M. A. Astrophysical Journal, 1996, 470, 706](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) | ||
| - [Nemmen, R. et al. *Science*, 2012, 338, 1445](http://labs.adsabs.harvard.edu/adsabs/abs/2012Sci...338.1445N/) ([bibtex citation info](http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode=2012Sci...338.1445N&data_type=BIBTEX&db_key=AST&nocookieset=1)) | ||
| I spent considerable time writing this code, making sure it is correct and *user-friendly*, so I would appreciate your citation of the second paper in the above list as a token of gratitude. | ||
| If you are *really* happy with the code, [you can buy me a beer](https://www.dropbox.com/s/a0rp5un6ubrkph2/crypto%20wallets.pdf?dl=0). | ||
| ## Running Tests | ||
| ```bash | ||
| pytest -v -s | ||
| ``` | ||
| ## Citation | ||
| If you end up using this code in your paper, you are morally obliged to cite the following works | ||
| - [Nemmen, R. et al. *Science*, 2012, 338, 1445](http://labs.adsabs.harvard.edu/adsabs/abs/2012Sci...338.1445N/) ([bibtex citation info](http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode=2012Sci...338.1445N&data_type=BIBTEX&db_key=AST&nocookieset=1)) | ||
| - The original BCES paper: [Akritas, M. G., & Bershady, M. A. Astrophysical Journal, 1996, 470, 706](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) | ||
| # Misc. | ||
| I spent considerable time writing this code, making sure it is correct and *user-friendly*, so I would appreciate your citation of the first paper in the above list as a token of gratitude. | ||
| This python module is inspired on the (much faster) fortran routine [originally written Akritas et al](http://www.astro.wisc.edu/%7Emab/archive/stats/stats.html). I wrote it because I wanted something more portable and easier to use, trading off speed. | ||
| If you are *really* happy with the code, you can buy me a coffee. | ||
| [](https://www.buymeacoffee.com/nemmen) | ||
| For a general tutorial on how to (and how not to) perform linear regression, [please read this paper: Hogg, D. et al. 2010, arXiv:1008.4686](http://labs.adsabs.harvard.edu/adsabs/abs/2010arXiv1008.4686H/). In particular, *please refrain from using the bisector method*. | ||
| If you want to plot confidence bands for your fits, have a look at [`nmmn` package](https://github.com/rsnemmen/nmmn) (in particular, modules `nmmn.plots.fitconf` and `stats`). | ||
| ## Bayesian linear regression | ||
| There are a couple of Bayesian approaches to perform linear regression which can be more powerful than BCES, some of which are described below. | ||
| **A Gibbs Sampler for Multivariate Linear Regression:** | ||
| [R code](https://github.com/abmantz/lrgs), [arXiv:1509.00908](http://arxiv.org/abs/1509.00908). | ||
| Linear regression in the fairly general case with errors in X and Y, errors may be correlated, intrinsic scatter. The prior distribution of covariates is modeled by a flexible mixture of Gaussians. This is an extension of the very nice work by Brandon Kelly [(Kelly, B. 2007, ApJ)](http://labs.adsabs.harvard.edu/adsabs/abs/2007ApJ...665.1489K/). | ||
| **LIRA: A Bayesian approach to linear regression in astronomy:** [R code](https://github.com/msereno/lira), [arXiv:1509.05778](http://arxiv.org/abs/1509.05778) | ||
| Bayesian hierarchical modelling of data with heteroscedastic and possibly correlated measurement errors and intrinsic scatter. The method fully accounts for time evolution. The slope, the normalization, and the intrinsic scatter of the relation can evolve with the redshift. The intrinsic distribution of the independent variable is approximated using a mixture of Gaussian distributions whose means and standard deviations depend on time. The method can address scatter in the measured independent variable (a kind of Eddington bias), selection effects in the response variable (Malmquist bias), and departure from linearity in form of a knee. | ||
| **AstroML: Machine Learning and Data Mining for Astronomy.** | ||
| [Python example](http://www.astroml.org/book_figures/chapter8/fig_total_least_squares.html) of a linear fit to data with correlated errors in x and y using AstroML. In the literature, this is often referred to as total least squares or errors-in-variables fitting. | ||
| # Todo | ||
| If you have improvements to the code, suggestions of examples,speeding up the code etc, feel free to [submit a pull request](https://guides.github.com/activities/contributing-to-open-source/). | ||
| * [ ] implement weighted least squares (WLS) | ||
| * [x] implement unit testing: `bces` | ||
| * [x] unit testing: `bootstrap` | ||
| [Visit the author's web page](https://rodrigonemmen.com/) and/or follow him on twitter ([@nemmen](https://twitter.com/nemmen)). | ||
| --- | ||
| Copyright (c) 2023, [Rodrigo Nemmen](https://rodrigonemmen.com). | ||
| [All rights reserved](http://opensource.org/licenses/BSD-2-Clause). | ||
| Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: | ||
| Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. | ||
| Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| LICENSE | ||
| MANIFEST.in | ||
| README.md | ||
| pyproject.toml | ||
| setup.cfg | ||
@@ -11,2 +12,4 @@ setup.py | ||
| bces.egg-info/dependency_links.txt | ||
| bces.egg-info/top_level.txt | ||
| bces.egg-info/requires.txt | ||
| bces.egg-info/top_level.txt | ||
| tests/test_bces.py |
+13
-10
@@ -1,15 +0,18 @@ | ||
| """ | ||
| """BCES linear regression for data with measurement errors.""" | ||
| Python module for performing robust linear regression on (X,Y) data points where | ||
| both X and Y have measurement errors. | ||
| __version__ = "2.0" | ||
| The fitting method is the bivariate correlated errors and intrinsic scatter | ||
| (BCES) and follows the description given in Akritas, M. G., & Bershady, M. A. | ||
| Astrophysical Journal, 1996, 470, 706. | ||
| # NOTE: `from bces import bces` returns the bces.bces submodule (not the function) | ||
| # due to a naming collision — the package, submodule, and core function share the | ||
| # name "bces". Use `import bces.bces as BCES; BCES.bces(...)` or | ||
| # `from bces.bces import bces` to access the regression function directly. | ||
| Author: Rodrigo Nemmen, https://rodrigonemmen.com | ||
| """ | ||
| __all__ = ["bces", "bcesboot", "bcesp", "bootstrap", "wls", "wlsboot", "wlsp"] | ||
| def __getattr__(name): | ||
| """Lazy attribute access for non-submodule names.""" | ||
| if name in ("bcesboot", "bcesp", "bootstrap", "wls", "wlsboot", "wlsp"): | ||
| import bces.bces as _mod | ||
| return getattr(_mod, name) | ||
| raise AttributeError(f"module 'bces' has no attribute {name!r}") |
+267
-118
@@ -1,6 +0,7 @@ | ||
| from __future__ import (absolute_import, division, print_function, | ||
| unicode_literals) | ||
| import numpy as np | ||
| import scipy | ||
| import scipy.stats | ||
| import tqdm | ||
| import time | ||
| import multiprocessing | ||
@@ -25,3 +26,3 @@ | ||
| - a,b : best-fit parameters a,b of the linear regression | ||
| - a,b : best-fit parameters a,b of the linear regression | ||
| - aerr,berr : the standard deviations in a,b | ||
@@ -42,3 +43,3 @@ - covab : the covariance between a and b (e.g. for plotting confidence bands) | ||
| xi,zeta=[],[] | ||
| # Calculate sigma's for datapoints using length of conf. intervals | ||
@@ -48,3 +49,3 @@ sig11var = np.mean( y1err**2 ) | ||
| sig12var = np.mean( cerr ) | ||
| # Covariance of Y1 (X) and Y2 (Y) | ||
@@ -62,8 +63,8 @@ covar_y1y2 = np.mean( (y1-y1.mean())*(y2-y2.mean()) ) | ||
| a[3] = 0.5*((a[1]-(1./a[0])) + sign*np.sqrt(4.+(a[1]-(1./a[0]))**2)) # orthogonal | ||
| # Compute intercepts | ||
| for i in range(4): | ||
| b[i]=y2.mean()-a[i]*y1.mean() | ||
| # Set up variables to calculate standard deviations of slope/intercept | ||
| # Set up variables to calculate standard deviations of slope/intercept | ||
| xi.append( ( (y1-y1.mean()) * (y2-a[0]*y1-b[0]) + a[0]*y1err**2 ) / (y1.var()-sig11var) ) # Y|X | ||
@@ -80,18 +81,19 @@ xi.append( ( (y2-y2.mean()) * (y2-a[1]*y1-b[1]) - y2err**2 ) / covar_y1y2 ) # X|Y | ||
| bvar[i]=zeta[i].var()/zeta[i].size | ||
| # Sample covariance obtained from xi and zeta (paragraph after equation 15 in AB96) | ||
| covarxiz[i]=np.mean( (xi[i]-xi[i].mean()) * (zeta[i]-zeta[i].mean()) ) | ||
| # Covariance between a and b (equation after eq. 15 in AB96) | ||
| covar_ab=covarxiz/y1.size | ||
| return a,b,np.sqrt(avar),np.sqrt(bvar),covar_ab | ||
| def bootstrap(v): | ||
| """ | ||
| Constructs Monte Carlo simulated data set using the | ||
| Bootstrap algorithm. | ||
| Bootstrap algorithm. | ||
@@ -103,7 +105,7 @@ Usage: | ||
| where x is either an array or a list of arrays. If it is a | ||
| list, the code returns the corresponding list of bootstrapped | ||
| arrays assuming that the same position in these arrays map the | ||
| list, the code returns the corresponding list of bootstrapped | ||
| arrays assuming that the same position in these arrays map the | ||
| same "physical" object. | ||
| """ | ||
| if type(v)==list: | ||
| if isinstance(v, list): | ||
| vboot=[] # list of boostrapped arrays | ||
@@ -117,5 +119,4 @@ n=v[0].size | ||
| vboot=v[iran] | ||
| return vboot | ||
@@ -126,5 +127,22 @@ | ||
| def _bootstrap_results(am, bm, nsim): | ||
| """ | ||
| Compute bootstrapping statistics from accumulated slope/intercept matrices. | ||
| Returns a, b, erra, errb, covab. | ||
| """ | ||
| a = am.mean(axis=0) | ||
| b = bm.mean(axis=0) | ||
| erra, errb, covab = np.zeros(4), np.zeros(4), np.zeros(4) | ||
| for i in range(4): | ||
| erra[i] = np.sqrt(1./(nsim-1) * (np.sum(am[:,i]**2) - nsim*(am[:,i].mean())**2)) | ||
| errb[i] = np.sqrt(1./(nsim-1) * (np.sum(bm[:,i]**2) - nsim*(bm[:,i].mean())**2)) | ||
| covab[i] = 1./(nsim-1) * (np.sum(am[:,i]*bm[:,i]) - nsim*am[:,i].mean()*bm[:,i].mean()) | ||
| return a, b, erra, errb, covab | ||
| def bcesboot(y1,y1err,y2,y2err,cerr,nsim=10000): | ||
| """ | ||
| Does the BCES with bootstrapping. | ||
| Does the BCES with bootstrapping. | ||
@@ -140,3 +158,3 @@ Usage: | ||
| :returns: a,b -- best-fit parameters a,b of the linear regression | ||
| :returns: a,b -- best-fit parameters a,b of the linear regression | ||
| :returns: aerr,berr -- the standard deviations in a,b | ||
@@ -147,10 +165,8 @@ :returns: covab -- the covariance between a and b (e.g. for plotting confidence bands) | ||
| """ | ||
| import tqdm | ||
| print("Bootstrapping progress:") | ||
| """ | ||
| My convention for storing the results of the bces code below as | ||
| My convention for storing the results of the bces code below as | ||
| matrixes for processing later are as follow: | ||
| simulation-method y|x x|y bisector orthogonal | ||
@@ -162,2 +178,3 @@ sim0 ... | ||
| """ | ||
| alist, blist = [], [] | ||
| for i in tqdm.tqdm(range(nsim)): | ||
@@ -172,31 +189,18 @@ # This is needed for small datasets. With a dataset of e.g. 4 points, | ||
| allEquals=allEqual(y1sim) | ||
| asim,bsim,errasim,errbsim,covabsim=bces(y1sim,y1errsim,y2sim,y2errsim,cerrsim) | ||
| if i==0: | ||
| # Initialize the matrixes | ||
| am,bm=asim.copy(),bsim.copy() | ||
| else: | ||
| am=np.vstack((am,asim)) | ||
| bm=np.vstack((bm,bsim)) | ||
| asim,bsim,errasim,errbsim,covabsim=bces(y1sim,y1errsim,y2sim,y2errsim,cerrsim) | ||
| alist.append(asim) | ||
| blist.append(bsim) | ||
| am = np.array(alist) | ||
| bm = np.array(blist) | ||
| if True in np.isnan(am): | ||
| am,bm=checkNan(am,bm) | ||
| # Bootstrapping results | ||
| a=np.array([ am[:,0].mean(),am[:,1].mean(),am[:,2].mean(),am[:,3].mean() ]) | ||
| b=np.array([ bm[:,0].mean(),bm[:,1].mean(),bm[:,2].mean(),bm[:,3].mean() ]) | ||
| nsim = am.shape[0] | ||
| # Error from unbiased sample variances | ||
| erra,errb,covab=np.zeros(4),np.zeros(4),np.zeros(4) | ||
| for i in range(4): | ||
| erra[i]=np.sqrt( 1./(nsim-1) * ( np.sum(am[:,i]**2)-nsim*(am[:,i].mean())**2 )) | ||
| errb[i]=np.sqrt( 1./(nsim-1) * ( np.sum(bm[:,i]**2)-nsim*(bm[:,i].mean())**2 )) | ||
| covab[i]=1./(nsim-1) * ( np.sum(am[:,i]*bm[:,i])-nsim*am[:,i].mean()*bm[:,i].mean() ) | ||
| return a,b,erra,errb,covab | ||
| return _bootstrap_results(am, bm, nsim) | ||
| def checkNan(am,bm): | ||
@@ -207,9 +211,7 @@ """ | ||
| regression (I need to investigate this in more details). | ||
| This method checks to see if there are NaNs in the bootstrapped | ||
| This method checks to see if there are NaNs in the bootstrapped | ||
| fits and remove them from the final sample. | ||
| """ | ||
| import nmmn.lsd | ||
| idel=nmmn.lsd.findnan(am[:,2]) | ||
| idel = np.where(np.isnan(am[:, 2]))[0] | ||
| print("Bootstrapping error: regression failed in",np.size(idel),"instances. They were removed.") | ||
@@ -223,3 +225,3 @@ | ||
| """ | ||
| Check if all elements in an array are equal. | ||
| Check if all elements in an array are equal. | ||
| Returns True if they are all the same. | ||
@@ -237,4 +239,173 @@ """ | ||
| # WLS fitting | ||
| # ============ | ||
| def wls(x, y, yerr): | ||
| """ | ||
| Weighted Least Squares regression for the case where X is measured without | ||
| error (V_{11,i} = 0). Implements the WLS estimator from Section 2.3 of | ||
| Akritas & Bershady (1996). | ||
| Usage: | ||
| >>> a, b, aerr, berr, covab = wls(x, y, yerr) | ||
| Output: | ||
| - a, b : best-fit slope and intercept (Y = a*X + b) | ||
| - aerr, berr : standard deviations of a and b | ||
| - covab : covariance between a and b | ||
| Arguments: | ||
| - x : independent variable (assumed error-free) | ||
| - y : dependent variable | ||
| - yerr : measurement errors on y (array) | ||
| """ | ||
| n = len(x) | ||
| v22 = yerr**2 | ||
| # Step 1: OLS to get initial slope/intercept | ||
| xmean, ymean = x.mean(), y.mean() | ||
| beta_ols = np.sum((x - xmean) * (y - ymean)) / np.sum((x - xmean)**2) | ||
| alpha_ols = ymean - beta_ols * xmean | ||
| # Step 2: residuals | ||
| R = y - alpha_ols - beta_ols * x | ||
| # Step 3: intrinsic scatter variance | ||
| Ve = np.mean((R - R.mean())**2) - np.mean(v22) | ||
| # Clamp to zero to avoid negative weights | ||
| Ve = max(Ve, 0.0) | ||
| # Step 4: total variance per point | ||
| sigma2 = Ve + v22 | ||
| # Step 5: WLS slope and intercept (equations 18-19) | ||
| w = 1.0 / sigma2 | ||
| S = w.sum() | ||
| Sx = (w * x).sum() | ||
| Sy = (w * y).sum() | ||
| Sxx = (w * x**2).sum() | ||
| Sxy = (w * x * y).sum() | ||
| Delta = S * Sxx - Sx**2 | ||
| a = (S * Sxy - Sx * Sy) / Delta | ||
| b = (Sxx * Sy - Sx * Sxy) / Delta | ||
| # Step 6: variance estimates (equations 20-21) | ||
| aerr = np.sqrt(S / Delta) | ||
| berr = np.sqrt(Sxx / Delta) | ||
| covab = -Sx / Delta | ||
| return a, b, aerr, berr, covab | ||
| def wlsboot(x, y, yerr, nsim=10000): | ||
| """ | ||
| Bootstrap version of WLS regression. | ||
| Usage: | ||
| >>> a, b, aerr, berr, covab = wlsboot(x, y, yerr, nsim) | ||
| :param x: independent variable (error-free) | ||
| :param y: dependent variable | ||
| :param yerr: measurement errors on y | ||
| :param nsim: number of bootstrap simulations | ||
| :returns: a, b -- best-fit slope and intercept | ||
| :returns: aerr, berr -- bootstrap standard deviations | ||
| :returns: covab -- bootstrap covariance between a and b | ||
| """ | ||
| print("WLS bootstrapping progress:") | ||
| alist, blist = [], [] | ||
| for i in tqdm.tqdm(range(nsim)): | ||
| allEquals = True | ||
| while allEquals: | ||
| [xsim, ysim, yerrsim] = bootstrap([x, y, yerr]) | ||
| allEquals = allEqual(xsim) | ||
| asim, bsim, _, _, _ = wls(xsim, ysim, yerrsim) | ||
| alist.append(asim) | ||
| blist.append(bsim) | ||
| am = np.array(alist).reshape(-1, 1) | ||
| bm = np.array(blist).reshape(-1, 1) | ||
| a = am.mean() | ||
| b = bm.mean() | ||
| aerr = np.sqrt(1./(nsim-1) * (np.sum(am**2) - nsim * a**2)) | ||
| berr = np.sqrt(1./(nsim-1) * (np.sum(bm**2) - nsim * b**2)) | ||
| covab = 1./(nsim-1) * (np.sum(am * bm) - nsim * a * b) | ||
| return a, b, aerr, berr, covab | ||
| def _ab_wls(x): | ||
| """ | ||
| Worker function for parallel WLS bootstrap. | ||
| Argument: [x_data, y_data, yerr_data, nsim_per_core] | ||
| Returns: (am, bm) arrays of shape (nsim_per_core,) | ||
| """ | ||
| x_data, y_data, yerr_data, nsim = x[0], x[1], x[2], int(x[3]) | ||
| alist, blist = [], [] | ||
| for i in range(nsim): | ||
| allEquals = True | ||
| while allEquals: | ||
| [xsim, ysim, yerrsim] = bootstrap([x_data, y_data, yerr_data]) | ||
| allEquals = allEqual(xsim) | ||
| asim, bsim, _, _, _ = wls(xsim, ysim, yerrsim) | ||
| alist.append(asim) | ||
| blist.append(bsim) | ||
| return np.array(alist), np.array(blist) | ||
| def wlsp(x, y, yerr, nsim=10000): | ||
| """ | ||
| Parallel bootstrap version of WLS regression. | ||
| Usage: | ||
| >>> a, b, aerr, berr, covab = wlsp(x, y, yerr, nsim) | ||
| :param x: independent variable (error-free) | ||
| :param y: dependent variable | ||
| :param yerr: measurement errors on y | ||
| :param nsim: number of bootstrap simulations | ||
| :returns: a, b -- best-fit slope and intercept | ||
| :returns: aerr, berr -- bootstrap standard deviations | ||
| :returns: covab -- bootstrap covariance between a and b | ||
| """ | ||
| print("WLS parallel bootstrap,", nsim, "trials...") | ||
| tic = time.time() | ||
| ncores = multiprocessing.cpu_count() | ||
| n = 2 * ncores | ||
| pargs = [[x, y, yerr, nsim // n] for _ in range(n)] | ||
| with multiprocessing.Pool(processes=ncores) as pool: | ||
| presult = pool.map(_ab_wls, pargs) | ||
| am = np.concatenate([r[0] for r in presult]) | ||
| bm = np.concatenate([r[1] for r in presult]) | ||
| actual_nsim = len(am) | ||
| a = am.mean() | ||
| b = bm.mean() | ||
| aerr = np.sqrt(1./(actual_nsim-1) * (np.sum(am**2) - actual_nsim * a**2)) | ||
| berr = np.sqrt(1./(actual_nsim-1) * (np.sum(bm**2) - actual_nsim * b**2)) | ||
| covab = 1./(actual_nsim-1) * (np.sum(am * bm) - actual_nsim * a * b) | ||
| print("%f s" % (time.time() - tic)) | ||
| return a, b, aerr, berr, covab | ||
| # Methods which make use of parallelization | ||
@@ -246,7 +417,7 @@ # =========================================== | ||
| """ | ||
| This method is the big bottleneck of the parallel BCES code. That's the | ||
| reason why I put these calculations in a separate method, in order to | ||
| distribute this among the cores. In the original BCES method, this is | ||
| This method is the big bottleneck of the parallel BCES code. That's the | ||
| reason why I put these calculations in a separate method, in order to | ||
| distribute this among the cores. In the original BCES method, this is | ||
| inside the main routine. | ||
| Argument: | ||
@@ -258,3 +429,3 @@ [y1,y1err,y2,y2err,cerr,nsim] | ||
| Be very careful and do not use lambda functions when calling this | ||
| Be very careful and do not use lambda functions when calling this | ||
| method and passing it to multiprocessing or ipython.parallel! | ||
@@ -265,3 +436,4 @@ I spent >2 hours figuring out why the code was not working until I | ||
| y1,y1err,y2,y2err,cerr,nsim=x[0],x[1],x[2],x[3],x[4],x[5] | ||
| alist, blist = [], [] | ||
| for i in range(int(nsim)): | ||
@@ -277,17 +449,14 @@ # This is needed for small datasets. With datasets of 4 points or less, | ||
| asim,bsim,errasim,errbsim,covabsim=bces(y1sim,y1errsim,y2sim,y2errsim,cerrsim) | ||
| if i==0: | ||
| # Initialize the matrixes | ||
| am,bm=asim.copy(),bsim.copy() | ||
| else: | ||
| am=np.vstack((am,asim)) | ||
| bm=np.vstack((bm,bsim)) | ||
| return am,bm | ||
| asim,bsim,errasim,errbsim,covabsim=bces(y1sim,y1errsim,y2sim,y2errsim,cerrsim) | ||
| alist.append(asim) | ||
| blist.append(bsim) | ||
| am = np.array(alist) | ||
| bm = np.array(blist) | ||
| return am, bm | ||
| def bcesp(y1,y1err,y2,y2err,cerr,nsim=10000): | ||
@@ -309,3 +478,3 @@ """ | ||
| :returns: a,b - best-fit parameters a,b of the linear regression | ||
| :returns: a,b - best-fit parameters a,b of the linear regression | ||
| :returns: aerr,berr - the standard deviations in a,b | ||
@@ -320,9 +489,6 @@ :returns: covab - the covariance between a and b (e.g. for plotting confidence bands) | ||
| .. codeauthor: Rodrigo Nemmen | ||
| """ | ||
| import time # for benchmarking | ||
| import multiprocessing | ||
| """ | ||
| print("BCES,", nsim,"trials... ") | ||
| tic=time.time() | ||
| # Find out number of cores available | ||
@@ -332,6 +498,6 @@ ncores=multiprocessing.cpu_count() | ||
| n=2*ncores | ||
| """ | ||
| Must create lists that will be distributed among the many | ||
| cores with structure | ||
| cores with structure | ||
| core1 <- [y1,y1err,y2,y2err,cerr,nsim/n] | ||
@@ -343,46 +509,29 @@ core2 <- [y1,y1err,y2,y2err,cerr,nsim/n] | ||
| for i in range(n): | ||
| pargs.append([y1,y1err,y2,y2err,cerr,nsim/n]) | ||
| pargs.append([y1,y1err,y2,y2err,cerr,nsim//n]) | ||
| # Initializes the parallel engine | ||
| pool = multiprocessing.Pool(processes=ncores) # multiprocessing package | ||
| """ | ||
| Each core processes ab(input) | ||
| return matrixes Am,Bm with the results of nsim/n | ||
| presult[i][0] = Am with nsim/n lines | ||
| presult[i][1] = Bm with nsim/n lines | ||
| """ | ||
| presult=pool.map(ab, pargs) # multiprocessing | ||
| pool.close() # close the parallel engine | ||
| with multiprocessing.Pool(processes=ncores) as pool: | ||
| """ | ||
| Each core processes ab(input) | ||
| return matrixes Am,Bm with the results of nsim/n | ||
| presult[i][0] = Am with nsim/n lines | ||
| presult[i][1] = Bm with nsim/n lines | ||
| """ | ||
| presult = pool.map(ab, pargs) # multiprocessing | ||
| # vstack the matrixes processed from all cores | ||
| i=0 | ||
| for m in presult: | ||
| if i==0: | ||
| # Initialize the matrixes | ||
| am,bm=m[0].copy(),m[1].copy() | ||
| else: | ||
| am=np.vstack((am,m[0])) | ||
| bm=np.vstack((bm,m[1])) | ||
| i=i+1 | ||
| am = np.vstack([m[0] for m in presult]) | ||
| bm = np.vstack([m[1] for m in presult]) | ||
| if True in np.isnan(am): | ||
| am,bm=checkNan(am,bm) | ||
| # Computes the bootstrapping results on the stacked matrixes | ||
| a=np.array([ am[:,0].mean(),am[:,1].mean(),am[:,2].mean(),am[:,3].mean() ]) | ||
| b=np.array([ bm[:,0].mean(),bm[:,1].mean(),bm[:,2].mean(),bm[:,3].mean() ]) | ||
| # Use actual number of simulations (may differ slightly due to integer division) | ||
| actual_nsim = am.shape[0] | ||
| # Error from unbiased sample variances | ||
| erra,errb,covab=np.zeros(4),np.zeros(4),np.zeros(4) | ||
| for i in range(4): | ||
| erra[i]=np.sqrt( 1./(nsim-1) * ( np.sum(am[:,i]**2)-nsim*(am[:,i].mean())**2 )) | ||
| errb[i]=np.sqrt( 1./(nsim-1) * ( np.sum(bm[:,i]**2)-nsim*(bm[:,i].mean())**2 )) | ||
| covab[i]=1./(nsim-1) * ( np.sum(am[:,i]*bm[:,i])-nsim*am[:,i].mean()*bm[:,i].mean() ) | ||
| print("%f s" % (time.time() - tic)) | ||
| return a,b,erra,errb,covab | ||
| return _bootstrap_results(am, bm, actual_nsim) | ||
+1
-1
| MIT License | ||
| Copyright (c) 2019 Rodrigo Nemmen | ||
| Copyright (c) 2026 Rodrigo Nemmen | ||
@@ -5,0 +5,0 @@ Permission is hereby granted, free of charge, to any person obtaining a copy |
+65
-108
@@ -1,40 +0,33 @@ | ||
| Metadata-Version: 2.1 | ||
| Metadata-Version: 2.4 | ||
| Name: bces | ||
| Version: 1.5.1 | ||
| Version: 2.0 | ||
| Summary: Python module for performing linear regression for data with measurement errors and intrinsic scatter | ||
| Home-page: https://github.com/rsnemmen/BCES | ||
| Download-URL: https://github.com/rsnemmen/BCES/archive/1.5.1.tar.gz | ||
| Download-URL: https://github.com/rsnemmen/BCES/archive/1.6.tar.gz | ||
| Author: Rodrigo Nemmen | ||
| Author-email: rodrigo.nemmen@iag.usp.br | ||
| License: MIT License | ||
| Copyright (c) 2019 Rodrigo Nemmen | ||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| of this software and associated documentation files (the "Software"), to deal | ||
| in the Software without restriction, including without limitation the rights | ||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| copies of the Software, and to permit persons to whom the Software is | ||
| furnished to do so, subject to the following conditions: | ||
| The above copyright notice and this permission notice shall be included in all | ||
| copies or substantial portions of the Software. | ||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
| SOFTWARE. | ||
| Author-email: Rodrigo Nemmen <rodrigo.nemmen@iag.usp.br> | ||
| License-Expression: MIT | ||
| Project-URL: Homepage, https://github.com/rsnemmen/BCES | ||
| Keywords: statistics,fitting,linear-regression,machine-learning | ||
| Classifier: Programming Language :: Python :: 3 | ||
| Classifier: Intended Audience :: Science/Research | ||
| Classifier: Topic :: Scientific/Engineering :: Astronomy | ||
| Requires-Python: >=3.8 | ||
| Description-Content-Type: text/markdown | ||
| License-File: LICENSE | ||
| Requires-Dist: numpy | ||
| Requires-Dist: scipy | ||
| Requires-Dist: tqdm | ||
| Dynamic: author | ||
| Dynamic: download-url | ||
| Dynamic: home-page | ||
| Dynamic: license-file | ||
| Dynamic: requires-python | ||
| Linear regression for data with measurement errors and intrinsic scatter (BCES) | ||
| ========== | ||
| BCES and WLS: Linear regression for data with measurement errors and intrinsic scatter | ||
| ========================================= | ||
| Python module for performing robust linear regression on (X,Y) data points where both X and Y have measurement errors. | ||
| Python module for performing robust linear regression on (X,Y) data points with measurement errors. | ||
| The fitting method is the *bivariate correlated errors and intrinsic scatter* (BCES) and follows the description given in [Akritas & Bershady. 1996, ApJ](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/). Some of the advantages of BCES regression compared to ordinary least squares fitting (quoted from Akritas & Bershady 1996): | ||
| The **BCES** fitting method is the *bivariate correlated errors and intrinsic scatter* (BCES) and follows the description given in [Akritas & Bershady. 1996, ApJ](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/). Some of the advantages of BCES regression compared to ordinary least squares (OLS) fitting: | ||
@@ -46,19 +39,14 @@ * it allows for measurement errors on both variables | ||
| In order to understand how to perform and interpret the regression results, please read the paper. | ||
| The **WLS** (weighted least squares) method handles the case where only Y has measurement errors and X is treated as error-free. It accounts for intrinsic scatter in the data and follows Akritas & Bershady 1996, §2.3. | ||
| # Installation | ||
| ## Installation | ||
| Using `pip`: | ||
| pip install bces | ||
| If that does not work, you can install it using the `setup.py` script: | ||
| Alternatively, if you plan to modify the source then install the package with a symlink, so that changes to the source files will be immediately available: | ||
| python setup.py install | ||
| pip install -e . | ||
| You may need to run the last command with `sudo`. | ||
| Alternatively, if you plan to modify the source then install the package with a symlink, so that changes to the source files will be immediately available: | ||
| python setup.py develop | ||
@@ -68,7 +56,6 @@ | ||
| ## Usage | ||
| ### BCES | ||
| # Usage | ||
| import bces.bces as BCES | ||
@@ -93,3 +80,3 @@ a,b,aerr,berr,covab=BCES.bcesp(x,xerr,y,yerr,cov) | ||
| Each element of the arrays *a*, *b*, *aerr*, *berr* and *covab* correspond to the result of one of the different BCES lines: *y|x*, *x|y*, bissector and orthogonal, as detailed in the table below. Please read the [original BCES paper](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) to understand what these different lines mean. | ||
| Each element of the arrays `a`, `b`, `aerr`, `berr` and `covab` correspond to the result of one of the different BCES lines: $y|x$, $x|y$, bissector and orthogonal, as detailed in the table below. Please read the [original BCES paper](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) to understand what these different lines mean. | ||
@@ -101,101 +88,71 @@ | ||
| | 1 | *x\|y* | Assumes *y* as the independent variable | | ||
| | 2 | bissector | Line that bisects the *y\|x* and *x\|y*. This approach is self-inconsistent, *do not use this method*, cf. [Hogg, D. et al. 2010, arXiv:1008.4686](http://labs.adsabs.harvard.edu/adsabs/abs/2010arXiv1008.4686H/). | | ||
| | 2 | bissector | Line that bisects the *y\|x* and *x\|y*. This approach is self-inconsistent, [*do not use this method*](https://arxiv.org/abs/1008.4686). | | ||
| | 3 | orthogonal | Orthogonal least squares: line that minimizes orthogonal distances. Should be used when it is not clear which variable should be treated as the independent one | | ||
| By default, `bcesp` run in parallel with bootstrapping. | ||
| By default, `bcesp` runs the bootstrapping in parallel. | ||
| ### WLS | ||
| import bces.bces as BCES | ||
| a,b,aerr,berr,covab=BCES.wls(x,y,yerr) | ||
| Arguments: | ||
| # Examples | ||
| - *x,y*: 1D data arrays | ||
| - *yerr*: measurement errors affecting y, 1D array | ||
| [`bces-example.ipynb` is a jupyter notebook](https://github.com/rsnemmen/BCES/blob/master/bces-examples.ipynb) including a practical, step-by-step example of how to use BCES to perform regression on data with uncertainties on x and y. It also illustrates how to plot the confidence band for a fit. | ||
| Output: | ||
|  | ||
| - *a,b*: best-fit slope and intercept of the linear regression such that *y = Ax + B* (scalars) | ||
| - *aerr,berr*: the standard deviations in a,b | ||
| - *covab*: the covariance between a and b | ||
| If you have suggestions of more examples, feel free to add them. | ||
| Note that unlike BCES, WLS returns scalar values (a single regression line) rather than 4-element arrays. | ||
| The `wlsp` method performs bootstrapping in parallel, if you need that. | ||
| ### When to use BCES or WLS? | ||
| # Running Tests | ||
| Both methods return unbiased estimates of the slope and intercept, but they suit different statistical situations: | ||
| To test your installation, run the following command inside the BCES directory: | ||
| - **Use BCES** when both X and Y have measurement errors, or when measurement errors on X and Y may be correlated. | ||
| - **Use WLS** when only Y has measurement errors (X is error-free or its errors are negligible). | ||
| ```bash | ||
| pytest -v | ||
| ``` | ||
| Both methods account for intrinsic scatter. | ||
| **Why choose WLS over OLS?** | ||
| When only Y has measurement errors, prefer WLS over OLS. OLS assigns equal weight to every data point regardless of measurement uncertainty, while WLS weights each point by the inverse of its error variance so more precisely measured points have greater influence on the fit. This produces more accurate and statistically efficient estimates when data points have heteroscedastic (unequal) errors. | ||
| # Requirements | ||
| See `requirements.txt`. | ||
| ## Examples | ||
| [`bces-examples.ipynb` is a jupyter notebook](https://github.com/rsnemmen/BCES/blob/master/doc/bces-examples.ipynb) including a practical, step-by-step example of how to use BCES to perform regression on data with uncertainties on x and y. It also illustrates how to plot the confidence band for a fit. | ||
| # Citation | ||
| [`wls.ipynb` is a jupyter notebook](https://github.com/rsnemmen/BCES/blob/master/doc/wls.ipynb) with examples of WLS regression, including fits with intrinsic scatter. | ||
| If you end up using this code in your paper, you are morally obliged to cite the following works | ||
|  | ||
| - The original BCES paper: [Akritas, M. G., & Bershady, M. A. Astrophysical Journal, 1996, 470, 706](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) | ||
| - [Nemmen, R. et al. *Science*, 2012, 338, 1445](http://labs.adsabs.harvard.edu/adsabs/abs/2012Sci...338.1445N/) ([bibtex citation info](http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode=2012Sci...338.1445N&data_type=BIBTEX&db_key=AST&nocookieset=1)) | ||
| I spent considerable time writing this code, making sure it is correct and *user-friendly*, so I would appreciate your citation of the second paper in the above list as a token of gratitude. | ||
| If you are *really* happy with the code, [you can buy me a beer](https://www.dropbox.com/s/a0rp5un6ubrkph2/crypto%20wallets.pdf?dl=0). | ||
| ## Running Tests | ||
| ```bash | ||
| pytest -v -s | ||
| ``` | ||
| ## Citation | ||
| If you end up using this code in your paper, you are morally obliged to cite the following works | ||
| - [Nemmen, R. et al. *Science*, 2012, 338, 1445](http://labs.adsabs.harvard.edu/adsabs/abs/2012Sci...338.1445N/) ([bibtex citation info](http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode=2012Sci...338.1445N&data_type=BIBTEX&db_key=AST&nocookieset=1)) | ||
| - The original BCES paper: [Akritas, M. G., & Bershady, M. A. Astrophysical Journal, 1996, 470, 706](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) | ||
| # Misc. | ||
| I spent considerable time writing this code, making sure it is correct and *user-friendly*, so I would appreciate your citation of the first paper in the above list as a token of gratitude. | ||
| This python module is inspired on the (much faster) fortran routine [originally written Akritas et al](http://www.astro.wisc.edu/%7Emab/archive/stats/stats.html). I wrote it because I wanted something more portable and easier to use, trading off speed. | ||
| If you are *really* happy with the code, you can buy me a coffee. | ||
| [](https://www.buymeacoffee.com/nemmen) | ||
| For a general tutorial on how to (and how not to) perform linear regression, [please read this paper: Hogg, D. et al. 2010, arXiv:1008.4686](http://labs.adsabs.harvard.edu/adsabs/abs/2010arXiv1008.4686H/). In particular, *please refrain from using the bisector method*. | ||
| If you want to plot confidence bands for your fits, have a look at [`nmmn` package](https://github.com/rsnemmen/nmmn) (in particular, modules `nmmn.plots.fitconf` and `stats`). | ||
| ## Bayesian linear regression | ||
| There are a couple of Bayesian approaches to perform linear regression which can be more powerful than BCES, some of which are described below. | ||
| **A Gibbs Sampler for Multivariate Linear Regression:** | ||
| [R code](https://github.com/abmantz/lrgs), [arXiv:1509.00908](http://arxiv.org/abs/1509.00908). | ||
| Linear regression in the fairly general case with errors in X and Y, errors may be correlated, intrinsic scatter. The prior distribution of covariates is modeled by a flexible mixture of Gaussians. This is an extension of the very nice work by Brandon Kelly [(Kelly, B. 2007, ApJ)](http://labs.adsabs.harvard.edu/adsabs/abs/2007ApJ...665.1489K/). | ||
| **LIRA: A Bayesian approach to linear regression in astronomy:** [R code](https://github.com/msereno/lira), [arXiv:1509.05778](http://arxiv.org/abs/1509.05778) | ||
| Bayesian hierarchical modelling of data with heteroscedastic and possibly correlated measurement errors and intrinsic scatter. The method fully accounts for time evolution. The slope, the normalization, and the intrinsic scatter of the relation can evolve with the redshift. The intrinsic distribution of the independent variable is approximated using a mixture of Gaussian distributions whose means and standard deviations depend on time. The method can address scatter in the measured independent variable (a kind of Eddington bias), selection effects in the response variable (Malmquist bias), and departure from linearity in form of a knee. | ||
| **AstroML: Machine Learning and Data Mining for Astronomy.** | ||
| [Python example](http://www.astroml.org/book_figures/chapter8/fig_total_least_squares.html) of a linear fit to data with correlated errors in x and y using AstroML. In the literature, this is often referred to as total least squares or errors-in-variables fitting. | ||
| # Todo | ||
| If you have improvements to the code, suggestions of examples,speeding up the code etc, feel free to [submit a pull request](https://guides.github.com/activities/contributing-to-open-source/). | ||
| * [ ] implement weighted least squares (WLS) | ||
| * [x] implement unit testing: `bces` | ||
| * [x] unit testing: `bootstrap` | ||
| [Visit the author's web page](https://rodrigonemmen.com/) and/or follow him on twitter ([@nemmen](https://twitter.com/nemmen)). | ||
| --- | ||
| Copyright (c) 2023, [Rodrigo Nemmen](https://rodrigonemmen.com). | ||
| [All rights reserved](http://opensource.org/licenses/BSD-2-Clause). | ||
| Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: | ||
| Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. | ||
| Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
+47
-83
@@ -1,7 +0,7 @@ | ||
| Linear regression for data with measurement errors and intrinsic scatter (BCES) | ||
| ========== | ||
| BCES and WLS: Linear regression for data with measurement errors and intrinsic scatter | ||
| ========================================= | ||
| Python module for performing robust linear regression on (X,Y) data points where both X and Y have measurement errors. | ||
| Python module for performing robust linear regression on (X,Y) data points with measurement errors. | ||
| The fitting method is the *bivariate correlated errors and intrinsic scatter* (BCES) and follows the description given in [Akritas & Bershady. 1996, ApJ](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/). Some of the advantages of BCES regression compared to ordinary least squares fitting (quoted from Akritas & Bershady 1996): | ||
| The **BCES** fitting method is the *bivariate correlated errors and intrinsic scatter* (BCES) and follows the description given in [Akritas & Bershady. 1996, ApJ](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/). Some of the advantages of BCES regression compared to ordinary least squares (OLS) fitting: | ||
@@ -13,19 +13,14 @@ * it allows for measurement errors on both variables | ||
| In order to understand how to perform and interpret the regression results, please read the paper. | ||
| The **WLS** (weighted least squares) method handles the case where only Y has measurement errors and X is treated as error-free. It accounts for intrinsic scatter in the data and follows Akritas & Bershady 1996, §2.3. | ||
| # Installation | ||
| ## Installation | ||
| Using `pip`: | ||
| pip install bces | ||
| If that does not work, you can install it using the `setup.py` script: | ||
| Alternatively, if you plan to modify the source then install the package with a symlink, so that changes to the source files will be immediately available: | ||
| python setup.py install | ||
| pip install -e . | ||
| You may need to run the last command with `sudo`. | ||
| Alternatively, if you plan to modify the source then install the package with a symlink, so that changes to the source files will be immediately available: | ||
| python setup.py develop | ||
@@ -35,7 +30,6 @@ | ||
| ## Usage | ||
| ### BCES | ||
| # Usage | ||
| import bces.bces as BCES | ||
@@ -60,3 +54,3 @@ a,b,aerr,berr,covab=BCES.bcesp(x,xerr,y,yerr,cov) | ||
| Each element of the arrays *a*, *b*, *aerr*, *berr* and *covab* correspond to the result of one of the different BCES lines: *y|x*, *x|y*, bissector and orthogonal, as detailed in the table below. Please read the [original BCES paper](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) to understand what these different lines mean. | ||
| Each element of the arrays `a`, `b`, `aerr`, `berr` and `covab` correspond to the result of one of the different BCES lines: $y|x$, $x|y$, bissector and orthogonal, as detailed in the table below. Please read the [original BCES paper](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) to understand what these different lines mean. | ||
@@ -68,101 +62,71 @@ | ||
| | 1 | *x\|y* | Assumes *y* as the independent variable | | ||
| | 2 | bissector | Line that bisects the *y\|x* and *x\|y*. This approach is self-inconsistent, *do not use this method*, cf. [Hogg, D. et al. 2010, arXiv:1008.4686](http://labs.adsabs.harvard.edu/adsabs/abs/2010arXiv1008.4686H/). | | ||
| | 2 | bissector | Line that bisects the *y\|x* and *x\|y*. This approach is self-inconsistent, [*do not use this method*](https://arxiv.org/abs/1008.4686). | | ||
| | 3 | orthogonal | Orthogonal least squares: line that minimizes orthogonal distances. Should be used when it is not clear which variable should be treated as the independent one | | ||
| By default, `bcesp` run in parallel with bootstrapping. | ||
| By default, `bcesp` runs the bootstrapping in parallel. | ||
| ### WLS | ||
| import bces.bces as BCES | ||
| a,b,aerr,berr,covab=BCES.wls(x,y,yerr) | ||
| Arguments: | ||
| # Examples | ||
| - *x,y*: 1D data arrays | ||
| - *yerr*: measurement errors affecting y, 1D array | ||
| [`bces-example.ipynb` is a jupyter notebook](https://github.com/rsnemmen/BCES/blob/master/bces-examples.ipynb) including a practical, step-by-step example of how to use BCES to perform regression on data with uncertainties on x and y. It also illustrates how to plot the confidence band for a fit. | ||
| Output: | ||
|  | ||
| - *a,b*: best-fit slope and intercept of the linear regression such that *y = Ax + B* (scalars) | ||
| - *aerr,berr*: the standard deviations in a,b | ||
| - *covab*: the covariance between a and b | ||
| If you have suggestions of more examples, feel free to add them. | ||
| Note that unlike BCES, WLS returns scalar values (a single regression line) rather than 4-element arrays. | ||
| The `wlsp` method performs bootstrapping in parallel, if you need that. | ||
| ### When to use BCES or WLS? | ||
| # Running Tests | ||
| Both methods return unbiased estimates of the slope and intercept, but they suit different statistical situations: | ||
| To test your installation, run the following command inside the BCES directory: | ||
| - **Use BCES** when both X and Y have measurement errors, or when measurement errors on X and Y may be correlated. | ||
| - **Use WLS** when only Y has measurement errors (X is error-free or its errors are negligible). | ||
| ```bash | ||
| pytest -v | ||
| ``` | ||
| Both methods account for intrinsic scatter. | ||
| **Why choose WLS over OLS?** | ||
| When only Y has measurement errors, prefer WLS over OLS. OLS assigns equal weight to every data point regardless of measurement uncertainty, while WLS weights each point by the inverse of its error variance so more precisely measured points have greater influence on the fit. This produces more accurate and statistically efficient estimates when data points have heteroscedastic (unequal) errors. | ||
| # Requirements | ||
| See `requirements.txt`. | ||
| ## Examples | ||
| [`bces-examples.ipynb` is a jupyter notebook](https://github.com/rsnemmen/BCES/blob/master/doc/bces-examples.ipynb) including a practical, step-by-step example of how to use BCES to perform regression on data with uncertainties on x and y. It also illustrates how to plot the confidence band for a fit. | ||
| # Citation | ||
| [`wls.ipynb` is a jupyter notebook](https://github.com/rsnemmen/BCES/blob/master/doc/wls.ipynb) with examples of WLS regression, including fits with intrinsic scatter. | ||
| If you end up using this code in your paper, you are morally obliged to cite the following works | ||
|  | ||
| - The original BCES paper: [Akritas, M. G., & Bershady, M. A. Astrophysical Journal, 1996, 470, 706](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) | ||
| - [Nemmen, R. et al. *Science*, 2012, 338, 1445](http://labs.adsabs.harvard.edu/adsabs/abs/2012Sci...338.1445N/) ([bibtex citation info](http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode=2012Sci...338.1445N&data_type=BIBTEX&db_key=AST&nocookieset=1)) | ||
| I spent considerable time writing this code, making sure it is correct and *user-friendly*, so I would appreciate your citation of the second paper in the above list as a token of gratitude. | ||
| If you are *really* happy with the code, [you can buy me a beer](https://www.dropbox.com/s/a0rp5un6ubrkph2/crypto%20wallets.pdf?dl=0). | ||
| ## Running Tests | ||
| ```bash | ||
| pytest -v -s | ||
| ``` | ||
| ## Citation | ||
| If you end up using this code in your paper, you are morally obliged to cite the following works | ||
| - [Nemmen, R. et al. *Science*, 2012, 338, 1445](http://labs.adsabs.harvard.edu/adsabs/abs/2012Sci...338.1445N/) ([bibtex citation info](http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode=2012Sci...338.1445N&data_type=BIBTEX&db_key=AST&nocookieset=1)) | ||
| - The original BCES paper: [Akritas, M. G., & Bershady, M. A. Astrophysical Journal, 1996, 470, 706](http://labs.adsabs.harvard.edu/adsabs/abs/1996ApJ...470..706A/) | ||
| # Misc. | ||
| I spent considerable time writing this code, making sure it is correct and *user-friendly*, so I would appreciate your citation of the first paper in the above list as a token of gratitude. | ||
| This python module is inspired on the (much faster) fortran routine [originally written Akritas et al](http://www.astro.wisc.edu/%7Emab/archive/stats/stats.html). I wrote it because I wanted something more portable and easier to use, trading off speed. | ||
| If you are *really* happy with the code, you can buy me a coffee. | ||
| [](https://www.buymeacoffee.com/nemmen) | ||
| For a general tutorial on how to (and how not to) perform linear regression, [please read this paper: Hogg, D. et al. 2010, arXiv:1008.4686](http://labs.adsabs.harvard.edu/adsabs/abs/2010arXiv1008.4686H/). In particular, *please refrain from using the bisector method*. | ||
| If you want to plot confidence bands for your fits, have a look at [`nmmn` package](https://github.com/rsnemmen/nmmn) (in particular, modules `nmmn.plots.fitconf` and `stats`). | ||
| ## Bayesian linear regression | ||
| There are a couple of Bayesian approaches to perform linear regression which can be more powerful than BCES, some of which are described below. | ||
| **A Gibbs Sampler for Multivariate Linear Regression:** | ||
| [R code](https://github.com/abmantz/lrgs), [arXiv:1509.00908](http://arxiv.org/abs/1509.00908). | ||
| Linear regression in the fairly general case with errors in X and Y, errors may be correlated, intrinsic scatter. The prior distribution of covariates is modeled by a flexible mixture of Gaussians. This is an extension of the very nice work by Brandon Kelly [(Kelly, B. 2007, ApJ)](http://labs.adsabs.harvard.edu/adsabs/abs/2007ApJ...665.1489K/). | ||
| **LIRA: A Bayesian approach to linear regression in astronomy:** [R code](https://github.com/msereno/lira), [arXiv:1509.05778](http://arxiv.org/abs/1509.05778) | ||
| Bayesian hierarchical modelling of data with heteroscedastic and possibly correlated measurement errors and intrinsic scatter. The method fully accounts for time evolution. The slope, the normalization, and the intrinsic scatter of the relation can evolve with the redshift. The intrinsic distribution of the independent variable is approximated using a mixture of Gaussian distributions whose means and standard deviations depend on time. The method can address scatter in the measured independent variable (a kind of Eddington bias), selection effects in the response variable (Malmquist bias), and departure from linearity in form of a knee. | ||
| **AstroML: Machine Learning and Data Mining for Astronomy.** | ||
| [Python example](http://www.astroml.org/book_figures/chapter8/fig_total_least_squares.html) of a linear fit to data with correlated errors in x and y using AstroML. In the literature, this is often referred to as total least squares or errors-in-variables fitting. | ||
| # Todo | ||
| If you have improvements to the code, suggestions of examples,speeding up the code etc, feel free to [submit a pull request](https://guides.github.com/activities/contributing-to-open-source/). | ||
| * [ ] implement weighted least squares (WLS) | ||
| * [x] implement unit testing: `bces` | ||
| * [x] unit testing: `bootstrap` | ||
| [Visit the author's web page](https://rodrigonemmen.com/) and/or follow him on twitter ([@nemmen](https://twitter.com/nemmen)). | ||
| --- | ||
| Copyright (c) 2023, [Rodrigo Nemmen](https://rodrigonemmen.com). | ||
| [All rights reserved](http://opensource.org/licenses/BSD-2-Clause). | ||
| Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: | ||
| Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. | ||
| Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
+1
-1
| [metadata] | ||
| description-file = README.md | ||
| description_file = README.md | ||
| license_files = LICENSE | ||
@@ -4,0 +4,0 @@ |
+5
-3
@@ -11,3 +11,3 @@ from setuptools import setup, find_packages | ||
| name='bces', | ||
| version='1.5.1', | ||
| version='2.0', | ||
| description='Python module for performing linear regression for data with measurement errors and intrinsic scatter', | ||
@@ -19,6 +19,8 @@ long_description=readme, | ||
| url='https://github.com/rsnemmen/BCES', | ||
| download_url = 'https://github.com/rsnemmen/BCES/archive/1.5.1.tar.gz', | ||
| download_url = 'https://github.com/rsnemmen/BCES/archive/1.6.tar.gz', | ||
| license=license, | ||
| keywords = ['statistics', 'fitting', 'linear-regression','machine-learning'], | ||
| packages=find_packages(exclude=('tests', 'docs')) | ||
| packages=find_packages(exclude=('tests', 'docs')), | ||
| install_requires=['numpy', 'scipy', 'tqdm'], | ||
| python_requires='>=3.8', | ||
| ) |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
44286
1.91%15
25%560
95.12%