Skip to content

Commit

Permalink
Merge branch 'release-0.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
bbengfort committed May 18, 2016
2 parents b988daf + c4f3ba7 commit a2b4e61
Show file tree
Hide file tree
Showing 12 changed files with 744 additions and 0 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ python:
- '3.5'

before_install:
- apt-get install -qq python-numpy python-scipy python-matplotlib
- pip install coveralls

install: pip install -r requirements.txt
Expand Down
15 changes: 15 additions & 0 deletions DESCRIPTION.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Yellowbrick is a suite of visual analysis and diagnostic tools designed to facilitate machine learning with Scikit-Learn. The package includes visualizations that can help users navigate the feature selection process, build intuition around model selection, diagnose common problems like bias, heteroscedasticity, underfit, and overtraining, and support hyperparameter tuning to steer predictive models toward more successful results.

Some of the available tools include:

- histograms
- scatter plot matrices
- parallel coordinates
- jointplots
- ROC curves
- classification heatmaps
- residual plots
- validation curves
- gridsearch heatmaps

For more, please see the full documentation at: http://yellowbrick.readthedocs.org/en/latest/
1 change: 1 addition & 0 deletions examples/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/*
64 changes: 64 additions & 0 deletions examples/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env python
# download
# Downloads the example datasets for running the examples.
#
# Author: Rebecca Bilbro <rbilbro@districtdatalabs.com>
# Created: Wed May 18 11:54:45 2016 -0400
#
# Copyright (C) 2016 District Data Labs
# For license information, see LICENSE.txt
#
# ID: download.py [] benjamin@bengfort.com $

"""
Downloads the example datasets for running the examples.
"""

##########################################################################
## Imports
##########################################################################

import os
import sys
import zipfile

try:
import requests
except ImportError:
print((
"The requests module is required to download data --\n"
"please install it with pip install requests."
))
sys.exit(1)

##########################################################################
## Links to data sets
##########################################################################

OCCUPANCY = ('http://bit.ly/ddl-occupancy-dataset', 'occupancy.zip')
CREDIT = ('http://bit.ly/ddl-credit-dataset', 'credit.xls')
CONCRETE = ('http://bit.ly/ddl-concrete-data', 'concrete.xls')


def download_data(url, name, path='data'):
if not os.path.exists(path):
os.mkdir(path)

response = requests.get(url)
with open(os.path.join(path, name), 'w') as f:
f.write(response.content)


def download_all(path='data'):
for href, name in (OCCUPANCY, CREDIT, CONCRETE):
download_data(href, name, path)

# Extract the occupancy zip data
z = zipfile.ZipFile(os.path.join(path, 'occupancy.zip'))
z.extractall(os.path.join(path, 'occupancy'))


if __name__ == '__main__':
path='data'
download_all(path)
print("Downloaded datasets to {}".format(os.path.abspath(path)))
293 changes: 293 additions & 0 deletions examples/examples.ipynb

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
# Dependencies
matplotlib==1.5.1
scikit-learn==0.17.1
numpy==1.11.0

## Utilities
#cycler==0.10.0
#pyparsing==2.1.4
#pytz==2016.4
#python-dateutil==2.5.3
#six==1.10.0
#requests==2.10.0

## Testing Requirements (uncomment for development)
#nose==1.3.7
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[metadata]
description-file = README.md
135 changes: 135 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#!/usr/bin/env python
# setup
# Setup script for installing yellowbrick
#
# Author: Benjamin Bengfort <bbengfort@districtdatalabs.com>
# Created: Wed May 18 14:33:26 2016 -0400
#
# Copyright (C) 2016 District Data Labs
# For license information, see LICENSE.txt and NOTICE.md
#
# ID: setup.py [] benjamin@bengfort.com $

"""
Setup script for installing yellowbrick.
See http://bbengfort.github.io/programmer/2016/01/20/packaging-with-pypi.html
"""

##########################################################################
## Imports
##########################################################################

import os
import re
import codecs

from setuptools import setup
from setuptools import find_packages

##########################################################################
## Package Information
##########################################################################

## Basic information
NAME = "yellowbrick"
DESCRIPTION = "A suite of visual analysis and diagnostic tools for machine learning."
AUTHOR = "Rebecca Bilbro"
EMAIL = "rbilbro@districtdatalabs.com"
LICENSE = "Apache 2"
REPOSITORY = "https://github.com/districtdatalabs/yellowbrick"
PACKAGE = "yellowbrick"

## Define the keywords
KEYWORDS = ('visualization', 'machine learning', 'scikit-learn', 'matplotlib', 'data science')

## Define the classifiers
## See https://pypi.python.org/pypi?%3Aaction=list_classifiers
CLASSIFIERS = (
'Development Status :: 4 - Beta',
'Environment :: Console',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.5',
'Topic :: Software Development',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Scientific/Engineering :: Visualization',
)

## Important Paths
PROJECT = os.path.abspath(os.path.dirname(__file__))
REQUIRE_PATH = "requirements.txt"
VERSION_PATH = os.path.join(PACKAGE, "version.py")
PKG_DESCRIBE = "DESCRIPTION.txt"

## Directories to ignore in find_packages
EXCLUDES = (
"tests", "bin", "docs", "fixtures", "register", "notebooks", "examples",
)

##########################################################################
## Helper Functions
##########################################################################

def read(*parts):
"""
Assume UTF-8 encoding and return the contents of the file located at the
absolute path from the REPOSITORY joined with *parts.
"""
with codecs.open(os.path.join(PROJECT, *parts), 'rb', 'utf-8') as f:
return f.read()


def get_version(path=VERSION_PATH):
"""
Reads the __init__.py defined in the VERSION_PATH to find the get_version
function, and executes it to ensure that it is loaded correctly.
"""
namespace = {}
exec(read(path), namespace)
return namespace['get_version']()


def get_requires(path=REQUIRE_PATH):
"""
Yields a generator of requirements as defined by the REQUIRE_PATH which
should point to a requirements.txt output by `pip freeze`.
"""
for line in read(path).splitlines():
line = line.strip()
if line and not line.startswith('#'):
yield line

##########################################################################
## Define the configuration
##########################################################################

config = {
"name": NAME,
"version": get_version(),
"description": DESCRIPTION,
"long_description": read(PKG_DESCRIBE),
"license": LICENSE,
"author": AUTHOR,
"author_email": EMAIL,
"maintainer": AUTHOR,
"maintainer_email": EMAIL,
"url": REPOSITORY,
"download_url": "{}/tarball/v{}".format(REPOSITORY, get_version()),
"packages": find_packages(where=PROJECT, exclude=EXCLUDES),
"install_requires": list(get_requires()),
"classifiers": CLASSIFIERS,
"keywords": KEYWORDS,
"zip_safe": False,
"scripts": [],
}

##########################################################################
## Run setup script
##########################################################################

if __name__ == '__main__':
setup(**config)
2 changes: 2 additions & 0 deletions yellowbrick/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
##########################################################################

from .version import get_version
from .anscombe import anscombe
from .classifier import crplot, rocplot_compare

##########################################################################
## Package Version
Expand Down
63 changes: 63 additions & 0 deletions yellowbrick/anscombe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# yellowbrick.anscombe
# Plots Anscombe's Quartet as an illustration of the importance of visualization.
#
# Author: Benjamin Bengfort <bbengfort@districtdatalabs.com>
# Created: Wed May 18 11:38:25 2016 -0400
#
# Copyright (C) 2016 District Data Labs
# For license information, see LICENSE.txt
#
# ID: anscombe.py [] benjamin@bengfort.com $

"""
Plots Anscombe's Quartet as an illustration of the importance of visualization.
"""

##########################################################################
## Imports
##########################################################################


import numpy as np
import matplotlib.pyplot as plt


##########################################################################
## Anscombe Data Arrays
##########################################################################

ANSCOMBE = [
np.array([
[10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0],
[8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68]
]),
np.array([
[10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0],
[9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74]
]),
np.array([
[10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0],
[7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73]
]),
np.array([
[8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0],
[6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.50, 5.56, 7.91, 6.89]
])
]


def anscombe():
"""
Creates 2x2 grid plot of the 4 anscombe datasets for illustration.
"""
fig, ((axa, axb), (axc, axd)) = plt.subplots(2, 2, sharex='col', sharey='row')
for arr, ax in zip(ANSCOMBE, (axa, axb, axc, axd)):
x = arr[0]
y = arr[1]

ax.scatter(x, y, c='g')
m,b = np.polyfit(x, y, 1)
X = np.linspace(ax.get_xlim()[0], ax.get_xlim()[1], 100)
ax.plot(X, m*X+b, '-')

return (axa, axb, axc, axd)
Loading

0 comments on commit a2b4e61

Please sign in to comment.