-
-
Notifications
You must be signed in to change notification settings - Fork 559
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
744 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
Yellowbrick is a suite of visual analysis and diagnostic tools designed to facilitate machine learning with Scikit-Learn. The package includes visualizations that can help users navigate the feature selection process, build intuition around model selection, diagnose common problems like bias, heteroscedasticity, underfit, and overtraining, and support hyperparameter tuning to steer predictive models toward more successful results. | ||
|
||
Some of the available tools include: | ||
|
||
- histograms | ||
- scatter plot matrices | ||
- parallel coordinates | ||
- jointplots | ||
- ROC curves | ||
- classification heatmaps | ||
- residual plots | ||
- validation curves | ||
- gridsearch heatmaps | ||
|
||
For more, please see the full documentation at: http://yellowbrick.readthedocs.org/en/latest/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
data/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#!/usr/bin/env python | ||
# download | ||
# Downloads the example datasets for running the examples. | ||
# | ||
# Author: Rebecca Bilbro <rbilbro@districtdatalabs.com> | ||
# Created: Wed May 18 11:54:45 2016 -0400 | ||
# | ||
# Copyright (C) 2016 District Data Labs | ||
# For license information, see LICENSE.txt | ||
# | ||
# ID: download.py [] benjamin@bengfort.com $ | ||
|
||
""" | ||
Downloads the example datasets for running the examples. | ||
""" | ||
|
||
########################################################################## | ||
## Imports | ||
########################################################################## | ||
|
||
import os | ||
import sys | ||
import zipfile | ||
|
||
try: | ||
import requests | ||
except ImportError: | ||
print(( | ||
"The requests module is required to download data --\n" | ||
"please install it with pip install requests." | ||
)) | ||
sys.exit(1) | ||
|
||
########################################################################## | ||
## Links to data sets | ||
########################################################################## | ||
|
||
OCCUPANCY = ('http://bit.ly/ddl-occupancy-dataset', 'occupancy.zip') | ||
CREDIT = ('http://bit.ly/ddl-credit-dataset', 'credit.xls') | ||
CONCRETE = ('http://bit.ly/ddl-concrete-data', 'concrete.xls') | ||
|
||
|
||
def download_data(url, name, path='data'): | ||
if not os.path.exists(path): | ||
os.mkdir(path) | ||
|
||
response = requests.get(url) | ||
with open(os.path.join(path, name), 'w') as f: | ||
f.write(response.content) | ||
|
||
|
||
def download_all(path='data'): | ||
for href, name in (OCCUPANCY, CREDIT, CONCRETE): | ||
download_data(href, name, path) | ||
|
||
# Extract the occupancy zip data | ||
z = zipfile.ZipFile(os.path.join(path, 'occupancy.zip')) | ||
z.extractall(os.path.join(path, 'occupancy')) | ||
|
||
|
||
if __name__ == '__main__': | ||
path='data' | ||
download_all(path) | ||
print("Downloaded datasets to {}".format(os.path.abspath(path))) |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[metadata] | ||
description-file = README.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
#!/usr/bin/env python | ||
# setup | ||
# Setup script for installing yellowbrick | ||
# | ||
# Author: Benjamin Bengfort <bbengfort@districtdatalabs.com> | ||
# Created: Wed May 18 14:33:26 2016 -0400 | ||
# | ||
# Copyright (C) 2016 District Data Labs | ||
# For license information, see LICENSE.txt and NOTICE.md | ||
# | ||
# ID: setup.py [] benjamin@bengfort.com $ | ||
|
||
""" | ||
Setup script for installing yellowbrick. | ||
See http://bbengfort.github.io/programmer/2016/01/20/packaging-with-pypi.html | ||
""" | ||
|
||
########################################################################## | ||
## Imports | ||
########################################################################## | ||
|
||
import os | ||
import re | ||
import codecs | ||
|
||
from setuptools import setup | ||
from setuptools import find_packages | ||
|
||
########################################################################## | ||
## Package Information | ||
########################################################################## | ||
|
||
## Basic information | ||
NAME = "yellowbrick" | ||
DESCRIPTION = "A suite of visual analysis and diagnostic tools for machine learning." | ||
AUTHOR = "Rebecca Bilbro" | ||
EMAIL = "rbilbro@districtdatalabs.com" | ||
LICENSE = "Apache 2" | ||
REPOSITORY = "https://github.com/districtdatalabs/yellowbrick" | ||
PACKAGE = "yellowbrick" | ||
|
||
## Define the keywords | ||
KEYWORDS = ('visualization', 'machine learning', 'scikit-learn', 'matplotlib', 'data science') | ||
|
||
## Define the classifiers | ||
## See https://pypi.python.org/pypi?%3Aaction=list_classifiers | ||
CLASSIFIERS = ( | ||
'Development Status :: 4 - Beta', | ||
'Environment :: Console', | ||
'Intended Audience :: Developers', | ||
'License :: OSI Approved :: Apache Software License', | ||
'Natural Language :: English', | ||
'Operating System :: OS Independent', | ||
'Programming Language :: Python', | ||
'Programming Language :: Python :: 2.7', | ||
'Programming Language :: Python :: 3.5', | ||
'Topic :: Software Development', | ||
'Topic :: Software Development :: Libraries :: Python Modules', | ||
'Topic :: Scientific/Engineering :: Visualization', | ||
) | ||
|
||
## Important Paths | ||
PROJECT = os.path.abspath(os.path.dirname(__file__)) | ||
REQUIRE_PATH = "requirements.txt" | ||
VERSION_PATH = os.path.join(PACKAGE, "version.py") | ||
PKG_DESCRIBE = "DESCRIPTION.txt" | ||
|
||
## Directories to ignore in find_packages | ||
EXCLUDES = ( | ||
"tests", "bin", "docs", "fixtures", "register", "notebooks", "examples", | ||
) | ||
|
||
########################################################################## | ||
## Helper Functions | ||
########################################################################## | ||
|
||
def read(*parts): | ||
""" | ||
Assume UTF-8 encoding and return the contents of the file located at the | ||
absolute path from the REPOSITORY joined with *parts. | ||
""" | ||
with codecs.open(os.path.join(PROJECT, *parts), 'rb', 'utf-8') as f: | ||
return f.read() | ||
|
||
|
||
def get_version(path=VERSION_PATH): | ||
""" | ||
Reads the __init__.py defined in the VERSION_PATH to find the get_version | ||
function, and executes it to ensure that it is loaded correctly. | ||
""" | ||
namespace = {} | ||
exec(read(path), namespace) | ||
return namespace['get_version']() | ||
|
||
|
||
def get_requires(path=REQUIRE_PATH): | ||
""" | ||
Yields a generator of requirements as defined by the REQUIRE_PATH which | ||
should point to a requirements.txt output by `pip freeze`. | ||
""" | ||
for line in read(path).splitlines(): | ||
line = line.strip() | ||
if line and not line.startswith('#'): | ||
yield line | ||
|
||
########################################################################## | ||
## Define the configuration | ||
########################################################################## | ||
|
||
config = { | ||
"name": NAME, | ||
"version": get_version(), | ||
"description": DESCRIPTION, | ||
"long_description": read(PKG_DESCRIBE), | ||
"license": LICENSE, | ||
"author": AUTHOR, | ||
"author_email": EMAIL, | ||
"maintainer": AUTHOR, | ||
"maintainer_email": EMAIL, | ||
"url": REPOSITORY, | ||
"download_url": "{}/tarball/v{}".format(REPOSITORY, get_version()), | ||
"packages": find_packages(where=PROJECT, exclude=EXCLUDES), | ||
"install_requires": list(get_requires()), | ||
"classifiers": CLASSIFIERS, | ||
"keywords": KEYWORDS, | ||
"zip_safe": False, | ||
"scripts": [], | ||
} | ||
|
||
########################################################################## | ||
## Run setup script | ||
########################################################################## | ||
|
||
if __name__ == '__main__': | ||
setup(**config) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# yellowbrick.anscombe | ||
# Plots Anscombe's Quartet as an illustration of the importance of visualization. | ||
# | ||
# Author: Benjamin Bengfort <bbengfort@districtdatalabs.com> | ||
# Created: Wed May 18 11:38:25 2016 -0400 | ||
# | ||
# Copyright (C) 2016 District Data Labs | ||
# For license information, see LICENSE.txt | ||
# | ||
# ID: anscombe.py [] benjamin@bengfort.com $ | ||
|
||
""" | ||
Plots Anscombe's Quartet as an illustration of the importance of visualization. | ||
""" | ||
|
||
########################################################################## | ||
## Imports | ||
########################################################################## | ||
|
||
|
||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
|
||
|
||
########################################################################## | ||
## Anscombe Data Arrays | ||
########################################################################## | ||
|
||
ANSCOMBE = [ | ||
np.array([ | ||
[10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0], | ||
[8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68] | ||
]), | ||
np.array([ | ||
[10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0], | ||
[9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74] | ||
]), | ||
np.array([ | ||
[10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0], | ||
[7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73] | ||
]), | ||
np.array([ | ||
[8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0], | ||
[6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.50, 5.56, 7.91, 6.89] | ||
]) | ||
] | ||
|
||
|
||
def anscombe(): | ||
""" | ||
Creates 2x2 grid plot of the 4 anscombe datasets for illustration. | ||
""" | ||
fig, ((axa, axb), (axc, axd)) = plt.subplots(2, 2, sharex='col', sharey='row') | ||
for arr, ax in zip(ANSCOMBE, (axa, axb, axc, axd)): | ||
x = arr[0] | ||
y = arr[1] | ||
|
||
ax.scatter(x, y, c='g') | ||
m,b = np.polyfit(x, y, 1) | ||
X = np.linspace(ax.get_xlim()[0], ax.get_xlim()[1], 100) | ||
ax.plot(X, m*X+b, '-') | ||
|
||
return (axa, axb, axc, axd) |
Oops, something went wrong.