diff --git a/GNUmakefile b/GNUmakefile
new file mode 100644
index 0000000..d105fd8
--- /dev/null
+++ b/GNUmakefile
@@ -0,0 +1,10 @@
+#############################################################################
+# Development make file.
+#############################################################################
+
+SOURCE_DIR := diffusion_maps
+
+$(SOURCE_DIR)/version.py:
+	@scripts/make-version > $@
+
+.PHONY: $(SOURCE_DIR)/version.py
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0a11cb0
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2017 Juan M. Bello Rivas <jmbr@superadditive.com>
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.org b/README.org
new file mode 100644
index 0000000..0092225
--- /dev/null
+++ b/README.org
@@ -0,0 +1,65 @@
+#+TITLE: Diffusion Maps and Geometric Harmonics for Python
+#+AUTHOR: Juan M. Bello Rivas
+#+EMAIL: jmbr@superadditive.com
+#+DATE: <2017-05-20 Sat>
+
+* Overview
+
+The =diffusion-maps= library for Python provides a fast and accurate implementation of diffusion maps[fn:1] and geometric harmonics[fn:2]. Its speed stems from the use of sparse linear algebra and (optionally) graphics processing units to accelerate computations.
+The included code routinely solves eigenvalue problems 3 x faster than SciPy using GPUs on matrices with over 200 million non-zero entries. 
+
+The package includes a command-line utility for the quick calculation of diffusion maps on data sets.
+
+Some of the features of the =diffusion-maps= module include:
+
+- Fast evaluation of distance matrices using nearest neighbors.
+
+- Fast and accurate computation of eigenvalue/eigenvector pairs using sparse linear algebra.
+
+- Optional GPU-accelerated sparse linear algebra routines.
+
+- Optional interface to the [[https://github.com/opencollab/arpack-ng][ARPACK-NG]] library.
+
+- Simple and easily modifiable code.
+
+[fn:1] Coifman, R. R., & Lafon, S. (2006). Diffusion maps. Applied and Computational Harmonic Analysis, 21(1), 5–30. http://doi.org/10.1016/j.acha.2006.04.006
+
+[fn:2] Coifman, R. R., & Lafon, S. (2006). Geometric harmonics: A novel tool for multiscale out-of-sample extension of empirical functions. Applied and Computational Harmonic Analysis, 21(1), 31–52. http://doi.org/10.1016/j.acha.2005.07.005
+
+#+CAPTION: Geometric harmonics for $z = sin(x^2 + y^2)$.
+#+NAME:   fig:geometric-harmonics
+[[./geometric-harmonics.png]]
+
+* Prerequisites
+
+The library is implemented in Python 3.5+ and uses [[http://www.numpy.org/][NumPy]] and [[https://www.scipy.org/][SciPy]]. It is recommended to install [[https://mathema.tician.de/software/pycuda/][PyCUDA]] to enable the GPU-accelerated eigenvalue solver.
+
+The =diffusion-maps= command can display the resulting diffusion maps using [[https://matplotlib.org/][Matplotlib]] if it is available.
+
+* Installation
+
+ Use ~python setup.py install~ to install on your system or ~python setup.py install --user~ for a user-specific installation.
+
+* Command-line utility
+
+The ~diffusion-maps~ command reads data sets stored in NPY format. The simplest way to use it is to invoke it as follows:
+
+#+BEGIN_SRC bash
+diffusion-maps DATA-SET.NPY EPSILON-VALUE
+#+END_SRC
+
+There exist parameters to save and visualize different types of results, to specify how many eigenvalue/eigenvector pairs to compute, etc. See the help page displayed by:
+
+#+BEGIN_SRC bash
+diffusion-maps --help
+#+END_SRC
+
+* Additional documentation
+
+[[http://www.sphinx-doc.org/en/stable/][Sphinx]]-based API documentation is available in the =doc/= folder. Run
+
+#+BEGIN_SRC bash
+make -C doc html
+#+END_SRC
+
+to build the documentation.
diff --git a/diffusion_maps/__init__.py b/diffusion_maps/__init__.py
new file mode 100644
index 0000000..e775abf
--- /dev/null
+++ b/diffusion_maps/__init__.py
@@ -0,0 +1,7 @@
+"""Diffusion maps module.
+
+"""
+
+from .diffusion_maps import *
+from .geometric_harmonics import *
+from .version import *
diff --git a/diffusion_maps/clock.py b/diffusion_maps/clock.py
new file mode 100644
index 0000000..21823a1
--- /dev/null
+++ b/diffusion_maps/clock.py
@@ -0,0 +1,61 @@
+"""Clock for keeping track of the wall time.
+
+"""
+
+
+__all__ = ['ClockError', 'Clock']
+
+import datetime
+import time
+
+from typing import Optional     # noqa: F401. Used for mypy.
+
+
+class ClockError(Exception):
+    """Invalid clock operation."""
+    pass
+
+
+class Clock:
+    """Clock for keeping track of time.
+
+    """
+    def __init__(self) -> None:
+        self.start = None       # type: Optional[float]
+        self.stop = None        # type: Optional[float]
+
+    def tic(self) -> None:
+        """Start the clock."""
+        self.start = time.monotonic()
+        self.stop = None
+
+    def toc(self) -> None:
+        """Stop the clock."""
+        assert self.start is not None
+        self.stop = time.monotonic()
+
+    def __str__(self) -> str:
+        """Human-readable representation of elapsed time."""
+        if self.start is None:
+            raise ClockError('The clock has not been started')
+        else:
+            start = datetime.datetime.fromtimestamp(self.start)
+
+        if self.stop is None:
+            stop = datetime.datetime.fromtimestamp(time.monotonic())
+        else:
+            stop = datetime.datetime.fromtimestamp(self.stop)
+
+        delta = stop - start
+
+        return str(delta)
+
+    def __enter__(self):
+        if self.start is None and self.stop is None:
+            self.tic()
+
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if self.start is not None:
+            self.toc()
diff --git a/diffusion_maps/command_line_interface.py b/diffusion_maps/command_line_interface.py
new file mode 100644
index 0000000..8666a17
--- /dev/null
+++ b/diffusion_maps/command_line_interface.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+# PYTHON_ARGCOMPLETE_OK                                -*- mode: python -*-
+
+"""Command line interface for the computation of diffusion maps.
+
+"""
+
+import argparse
+try:
+    import argcomplete
+except ImportError:
+    argcomplete = None
+import logging
+import os
+import sys
+
+import numpy as np
+
+import diffusion_maps.default as default
+import diffusion_maps.version as version
+from diffusion_maps import downsample, DiffusionMaps
+from diffusion_maps.profiler import Profiler
+from diffusion_maps.plot import plot_eigenvectors
+
+
+def output_eigenvalues(ew: np.array) -> None:
+    """Output the table of eigenvalues.
+
+    """
+    logging.info('List of eigenvalues:')
+    fields = ['Real part', 'Imaginary part']
+    fmt = '{:>12} | {:<21}'
+    logging.info('-' * 30)
+    logging.info(fmt.format(*fields))
+    logging.info('-' * 30)
+    fmt = '{:+2.9f} | {:+2.9f}'
+    for eigenvalue in ew:
+        logging.info(fmt.format(eigenvalue.real, eigenvalue.imag))
+
+
+def use_cuda(args: argparse.Namespace) -> bool:
+    """Determine whether to use GPU-accelerated code or not.
+
+    """
+    try:
+        import pycuda           # noqa
+        use_cuda = True and not args.no_gpu
+    except ImportError:
+        use_cuda = False
+
+    return use_cuda
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Diffusion maps')
+    parser.add_argument('data_file', metavar='FILE', type=str,
+                        help='process %(metavar)s (should be in NPY format)')
+    parser.add_argument('epsilon', metavar='VALUE', type=float,
+                        help='kernel bandwidth')
+    parser.add_argument('-n', '--num-samples', type=float, metavar='NUM',
+                        required=False, help='number of data points to use')
+    parser.add_argument('-e', '--num-eigenpairs', type=int, metavar='NUM',
+                        required=False, default=default.num_eigenpairs,
+                        help='number of eigenvalue/eigenvector pairs to '
+                        'compute')
+    parser.add_argument('-c', '--cut-off', type=float, required=False,
+                        metavar='DISTANCE', help='cut-off to use to enforce '
+                        'sparsity in the diffusion maps computation.')
+    parser.add_argument('-o', '--output-data', type=str, required=False,
+                        default='actual-data.npy', metavar='FILE', help='save '
+                        'actual data used in computation to %(metavar)s')
+    parser.add_argument('-w', '--eigenvalues', type=str,
+                        default='eigenvalues.dat', required=False,
+                        metavar='FILE', help='save eigenvalues to '
+                        '%(metavar)s')
+    parser.add_argument('-v', '--eigenvectors', type=str,
+                        default='eigenvectors.npy', required=False,
+                        metavar='FILE', help='save eigenvectors to '
+                        '%(metavar)s')
+    parser.add_argument('-m', '--matrix', type=str, required=False,
+                        metavar='FILE', help='save transition matrix to '
+                        '%(metavar)s')
+    parser.add_argument('-p', '--plot', action='store_true', default=False,
+                        help='plot first two eigenvectors')
+    parser.add_argument('--no-gpu', action='store_true', required=False,
+                        help='disable GPU eigensolver')
+    parser.add_argument('--debug', action='store_true', required=False,
+                        help='print debugging information')
+    parser.add_argument('--profile', required=False, metavar='FILE',
+                        type=argparse.FileType('w', encoding='utf-8'),
+                        help='run under profiler and save report to '
+                        '%(metavar)s')
+
+    args = parser.parse_args(sys.argv[1:])
+
+    if args.debug is True:
+        logging.basicConfig(level=logging.DEBUG, format='%(message)s')
+    else:
+        logging.basicConfig(level=logging.INFO, format='%(message)s')
+
+    prog_name = os.path.basename(sys.argv[0])
+    logging.info('{} {}'.format(prog_name, version.v_long))
+    logging.info('')
+    logging.info('Reading data from {!r}...'.format(args.data_file))
+
+    orig_data = np.load(args.data_file)
+    if args.num_samples:
+        data = downsample(orig_data, int(args.num_samples))
+    else:
+        data = orig_data
+
+    logging.info('Computing {} diffusion maps with epsilon = {:g} '
+                 'on {} data points...'
+                 .format(args.num_eigenpairs-1, args.epsilon, data.shape[0]))
+
+    with Profiler(args.profile):
+        dm = DiffusionMaps(data, args.epsilon,
+                           num_eigenpairs=args.num_eigenpairs,
+                           use_cuda=use_cuda(args))
+
+    if args.profile:
+        args.profile.close()
+
+    output_eigenvalues(dm.eigenvalues)
+
+    if args.matrix:
+        logging.info('Saving transition matrix to {!r}'
+                     .format(args.matrix))
+        import scipy.io
+        scipy.io.mmwrite(args.matrix, dm.kernel_matrix)
+
+    if args.eigenvalues:
+        logging.info('Saving eigenvalues to {!r}'
+                     .format(args.eigenvalues))
+        np.savetxt(args.eigenvalues, dm.eigenvalues)
+
+    if args.eigenvectors:
+        logging.info('Saving eigenvectors to {!r}'
+                     .format(args.eigenvectors))
+        np.save(args.eigenvectors, dm.eigenvectors)
+
+    if args.output_data and args.num_samples:
+        logging.info('Saving downsampled data to {!r}'
+                     .format(args.output_data))
+        np.save(args.output_data, data)
+
+    if args.plot is True:
+        plot_eigenvectors(data, dm.eigenvectors)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/diffusion_maps/cpu_eigensolver.py b/diffusion_maps/cpu_eigensolver.py
new file mode 100644
index 0000000..740e824
--- /dev/null
+++ b/diffusion_maps/cpu_eigensolver.py
@@ -0,0 +1,49 @@
+"""Compute dominant eigenvectors of a sparse matrix.
+
+"""
+
+__all__ = ['eigensolver']
+
+from typing import Optional, Tuple
+
+import numpy as np
+import scipy.sparse
+import scipy.sparse.linalg
+
+from . import default
+
+
+def eigensolver(matrix: scipy.sparse.csr_matrix,
+                num_eigenpairs: int = default.num_eigenpairs,
+                sigma: Optional[float] = None,
+                initial_vector: Optional[np.array] = None)  \
+        -> Tuple[np.array, np.array]:
+    """Solve  eigenvalue problem for sparse matrix.
+
+    Parameters
+    ----------
+    matrix : scipy.sparse.csr_matrix
+        A matrix in compressed sparse row format.
+    num_eigenpairs : int, optional
+        Number of eigenvalue/eigenvector pairs to obtain.
+    sigma : float, optional
+        Find eigenvalues close to the value of sigma. The default value is
+        near 1.0.
+    initial_vector : np.array, optional
+        Initial vector to use in the Arnoldi iteration. If not set, a vector
+        with all entries equal to one will be used.
+
+    Returns
+    -------
+    ew : np.array
+        Eigenvalues in descending order of magnitude.
+    ev : np.array
+        Eigenvectors corresponding to the eigenvalues in `ew`.
+
+    """
+    if initial_vector is None:
+        initial_vector = np.ones(matrix.shape[0])
+    ew, ev = scipy.sparse.linalg.eigs(matrix, k=num_eigenpairs, which='LM',
+                                      sigma=sigma, v0=initial_vector)
+    ii = np.argsort(np.abs(ew))[::-1]
+    return ew[ii], ev[:, ii].T
diff --git a/diffusion_maps/cusparse.py b/diffusion_maps/cusparse.py
new file mode 100644
index 0000000..688afd1
--- /dev/null
+++ b/diffusion_maps/cusparse.py
@@ -0,0 +1,118 @@
+"""Minimalistic interface to the NVIDIA cuSPARSE library.
+
+"""
+
+__all__ = ['cusparseCreate', 'cusparseDestroy', 'cusparseGetVersion',
+           'cusparseCreateMatDescr', 'cusparseDestroyMatDescr',
+           'cusparseDcsrmv']
+
+
+import ctypes
+import ctypes.util
+from enum import IntEnum
+
+import pycuda.autoinit          # noqa
+import pycuda.gpuarray as gpuarray
+
+
+libcusparse = ctypes.cdll.LoadLibrary(ctypes.util.find_library('cusparse'))
+
+libcusparse.cusparseCreate.restype = int
+libcusparse.cusparseCreate.argtypes = [ctypes.c_void_p]
+
+libcusparse.cusparseDestroy.restype = int
+libcusparse.cusparseDestroy.argtypes = [ctypes.c_int]
+
+libcusparse.cusparseGetVersion.restype = int
+libcusparse.cusparseGetVersion.argtypes = [ctypes.c_int, ctypes.c_void_p]
+
+libcusparse.cusparseCreateMatDescr.restype = int
+libcusparse.cusparseCreateMatDescr.argtypes = [ctypes.c_void_p]
+
+libcusparse.cusparseDestroyMatDescr.restype = int
+libcusparse.cusparseDestroyMatDescr.argtypes = [ctypes.c_int]
+
+libcusparse.cusparseDcsrmv.restype = int
+libcusparse.cusparseDcsrmv.argtypes = [ctypes.c_int, ctypes.c_int,
+                                       ctypes.c_int, ctypes.c_int,
+                                       ctypes.c_int, ctypes.c_void_p,
+                                       ctypes.c_int, ctypes.c_void_p,
+                                       ctypes.c_void_p, ctypes.c_void_p,
+                                       ctypes.c_void_p, ctypes.c_void_p,
+                                       ctypes.c_void_p]
+
+
+class cusparseOperation(IntEnum):
+    CUSPARSE_OPERATION_NON_TRANSPOSE = 0
+    CUSPARSE_OPERATION_TRANSPOSE = 1
+    CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE = 2
+
+
+class cusparseError(Exception):
+    """Error in call to cuSPARSE library."""
+    def __init__(self, status):
+        self.status = status
+
+    def __repr__(self):
+        return ('{}(status={})'
+                .format(self.__class__.__name__, self.status))
+
+
+def cusparseCreate() -> ctypes.c_int:
+    handle = ctypes.c_int()
+
+    status = libcusparse.cusparseCreate(ctypes.byref(handle))
+    if status != 0:
+        raise cusparseError(status)
+
+    return handle
+
+
+def cusparseDestroy(handle: ctypes.c_int) -> None:
+    status = libcusparse.cusparseDestroy(handle)
+    if status != 0:
+        raise cusparseError(status)
+
+    return handle
+
+
+def cusparseGetVersion(handle: ctypes.c_int) -> int:
+    version = ctypes.c_int()
+
+    status = libcusparse.cusparseGetVersion(handle, ctypes.byref(version))
+    if status != 0:
+        raise cusparseError(status)
+
+    return status
+
+
+def cusparseCreateMatDescr() -> ctypes.c_int:
+    descr = ctypes.c_int()
+
+    status = libcusparse.cusparseCreateMatDescr(ctypes.byref(descr))
+    if status != 0:
+        raise cusparseError(status)
+
+    return descr
+
+
+def cusparseDestroyMatDescr(descr: ctypes.c_int) -> None:
+    status = libcusparse.cusparseDestroyMatDescr(descr)
+    if status != 0:
+        raise cusparseError(status)
+
+
+def cusparseDcsrmv(handle: ctypes.c_int, transA: cusparseOperation, m: int,
+                   n: int, nnz: int, alpha: float, descrA: ctypes.c_int,
+                   csrValA: gpuarray.GPUArray, csrRowPtrA: gpuarray.GPUArray,
+                   csrColIndA: gpuarray.GPUArray, x: gpuarray.GPUArray, beta:
+                   float, y: gpuarray.GPUArray):
+    alpha_ = ctypes.c_double(alpha)
+    beta_ = ctypes.c_double(beta)
+    status = libcusparse.cusparseDcsrmv(handle, transA, m, n, nnz,
+                                        ctypes.byref(alpha_), descrA,
+                                        csrValA.ptr, csrRowPtrA.ptr,
+                                        csrColIndA.ptr, x.ptr,
+                                        ctypes.byref(beta_), y.ptr)
+    if status != 0:
+        raise cusparseError(status)
diff --git a/diffusion_maps/default.py b/diffusion_maps/default.py
new file mode 100644
index 0000000..fccb984
--- /dev/null
+++ b/diffusion_maps/default.py
@@ -0,0 +1,7 @@
+"""Default values.
+
+"""
+
+num_eigenpairs = 11
+
+use_cuda = True
diff --git a/diffusion_maps/diffusion_maps.py b/diffusion_maps/diffusion_maps.py
new file mode 100644
index 0000000..29132ca
--- /dev/null
+++ b/diffusion_maps/diffusion_maps.py
@@ -0,0 +1,216 @@
+"""Diffusion maps module.
+
+This module implements the diffusion maps method for dimensionality
+reduction, as introduced in:
+
+Coifman, R. R., & Lafon, S. (2006). Diffusion maps. Applied and Computational
+Harmonic Analysis, 21(1), 5–30. DOI:10.1016/j.acha.2006.04.006
+
+"""
+
+__all__ = ['DiffusionMaps', 'downsample']
+
+
+import logging
+from typing import Optional, Dict
+
+import numpy as np
+import scipy
+import scipy.sparse
+from scipy.spatial import cKDTree
+
+from . import default
+from . import utils
+from . import clock
+Clock = clock.Clock
+
+
+def downsample(data: np.array, num_samples: int) -> np.array:
+    """Randomly sample a subset of a data set while preserving order.
+
+    The sampling is done without replacement.
+
+    Parameters
+    ----------
+    data : np.array
+        Array whose 0-th axis indexes the data points.
+    num_samples : int
+        Number of items to randomly (uniformly) sample from the data.  This
+        is typically less than the total number of elements in the data set.
+
+    Returns
+    -------
+    sampled_data : np.array
+       A total of `num_samples` uniformly randomly sampled data points from
+       `data`.
+
+    """
+    assert num_samples <= data.shape[0]
+    indices = sorted(np.random.choice(range(data.shape[0]), num_samples,
+                                      replace=False))
+    return data[indices, :]
+
+
+def make_stochastic_matrix(matrix: scipy.sparse.csr_matrix) -> None:
+    """Normalize a sparse, non-negative matrix in CSR format.
+
+    Normalizes (in the 1-norm) each row of a non-negative matrix and returns
+    the result.
+
+    Parameters
+    ----------
+    matrix : np.array
+        A matrix with non-negative entries to be normalized.
+
+    """
+    data = matrix.data
+    indptr = matrix.indptr
+    for i in range(matrix.shape[0]):
+        a, b = indptr[i:i+2]
+        norm1 = np.sum(data[a:b])
+        data[a:b] /= norm1
+
+
+class DiffusionMaps:
+    """Diffusion maps.
+
+    Attributes
+    ----------
+    epsilon : float
+        Bandwidth for kernel.
+    _cut_off : float
+        Cut off for the computation of pairwise distances between points.
+    _kdtree : cKDTree
+        KD-tree for accelerating pairwise distance computation.
+    eigenvectors : np.array
+        Right eigenvectors of `K`.
+    eigenvalues : np.array
+        Eigenvalues of `K`.
+    kernel_matrix : scipy.sparse.spmatrix
+        (Possibly stochastic) matrix obtained by evaluating a Gaussian kernel
+        on the data points.
+
+    """
+    def __init__(self, points: np.array, epsilon: float,
+                 cut_off: Optional[float] = None,
+                 num_eigenpairs: Optional[int] = default.num_eigenpairs,
+                 normalize_kernel: Optional[bool] = True,
+                 kdtree_options: Optional[Dict] = None,
+                 use_cuda: Optional[bool] = default.use_cuda) \
+            -> None:
+        """Compute diffusion maps.
+
+        This function computes the eigendecomposition of the transition
+        matrix associated to a random walk on the data using a bandwidth
+        (time) equal to epsilon.
+
+        Parameters
+        ----------
+        points : np.array
+            Data set to analyze. Its 0-th axis must index each data point.
+        epsilon : float
+            Bandwidth to use for the kernel.
+        cut_off : float, optional
+            Cut-off for the distance matrix computation. It should be at
+            least equal to `epsilon`.
+        num_eigenpairs : int, optional
+            Number of eigenpairs to compute. Default is
+            `default.num_eigenpairs`.
+        normalize_kernel : bool, optional
+            Whether to convert the kernel into a stochastic matrix or
+            not. Default is `True`.
+        kdtree_options : dict, optional
+            A dictionary containing parameters to pass to the underlying
+            cKDTree object.
+        use_cuda : bool, optional
+            Determine whether to use CUDA-enabled eigenvalue solver or not.
+
+        """
+        self.epsilon = epsilon
+
+        if cut_off is None:
+            self._cut_off = self.__get_cut_off(self.epsilon)
+        else:
+            self._cut_off = cut_off
+
+        if kdtree_options is None:
+            kdtree_options = dict()
+        with Clock() as clock:
+            self._kdtree = cKDTree(points, **kdtree_options)
+            logging.debug('KD-tree computation: {} seconds.'.format(clock))
+
+        with Clock() as clock:
+            distance_matrix \
+                = self._kdtree.sparse_distance_matrix(self._kdtree,
+                                                      self._cut_off,
+                                                      output_type='coo_matrix')
+            logging.debug('Sparse distance matrix computation: {} seconds.'
+                          .format(clock))
+
+        logging.debug('Distance matrix has {} nonzero entries ({:.4f}% dense).'
+                      .format(distance_matrix.nnz, distance_matrix.nnz
+                              / np.prod(distance_matrix.shape)))
+
+        with Clock() as clock:
+            distance_matrix = utils.coo_tocsr(distance_matrix)
+            logging.debug('Conversion from COO to CSR format: {} seconds.'
+                          .format(clock))
+
+        with Clock() as clock:
+            self.kernel_matrix = self._compute_kernel_matrix(distance_matrix)
+            logging.debug('Kernel matrix computation: {} seconds.'
+                          .format(clock))
+
+        with Clock() as clock:
+            if normalize_kernel is True:
+                make_stochastic_matrix(self.kernel_matrix)
+                logging.debug('Normalization: {} seconds.'.format(clock))
+
+        with Clock() as clock:
+            if use_cuda is True:
+                from .gpu_eigensolver import eigensolver
+                ew, ev = eigensolver(self.kernel_matrix, num_eigenpairs)
+                logging.debug('GPU eigensolver: {} seconds.'.format(clock))
+            else:
+                from .cpu_eigensolver import eigensolver
+                ew, ev = eigensolver(self.kernel_matrix, num_eigenpairs)
+                logging.debug('CPU eigensolver: {} seconds.'.format(clock))
+
+        self.eigenvalues = ew
+        self.eigenvectors = ev
+
+    @staticmethod
+    def __get_cut_off(epsilon: float) -> float:
+        """Return a reasonable cut off value.
+
+        """
+        return 2.0 * epsilon  # XXX Validate this.
+
+    def _compute_kernel_matrix(self, distance_matrix: scipy.sparse.spmatrix) \
+            -> scipy.sparse.spmatrix:
+        """Compute kernel matrix.
+
+        Returns the (unnormalized) Gaussian kernel matrix corresponding to
+        the data set and choice of bandwidth `epsilon`.
+
+        Parameters
+        ----------
+        distance_matrix : scipy.sparse.spmatrix
+            A sparse matrix whose entries are the distances between data
+            points.
+
+        See also
+        --------
+        _compute_distance_matrix, make_stochastic_matrix
+
+        """
+        data = distance_matrix.data
+        transformed_data = self.kernel_function(data)
+        kernel_matrix = distance_matrix._with_data(transformed_data, copy=True)
+        return kernel_matrix
+
+    def kernel_function(self, distances: np.array) -> np.array:
+        """Evaluate kernel function.
+
+        """
+        return np.exp(-np.square(distances) / (2.0 * self.epsilon))
diff --git a/diffusion_maps/geometric_harmonics.py b/diffusion_maps/geometric_harmonics.py
new file mode 100644
index 0000000..05feb22
--- /dev/null
+++ b/diffusion_maps/geometric_harmonics.py
@@ -0,0 +1,60 @@
+"""Geometric harmonics module.
+
+This module implements out-of-sample evaluation of functions using the
+Geometric Harmonics method introduced in:
+
+Coifman, R. R., & Lafon, S. (2006). Geometric harmonics: A novel tool for
+multiscale out-of-sample extension of empirical functions. Applied and
+Computational Harmonic Analysis, 21(1), 31–52. DOI:10.1016/j.acha.2005.07.005
+
+"""
+
+__all__ = ['GeometricHarmonicsInterpolator']
+
+from typing import Optional, Dict
+
+import numpy as np
+import scipy.spatial
+from scipy.interpolate.interpnd import (NDInterpolatorBase,
+                                        _ndim_coords_from_arrays)
+
+from diffusion_maps.diffusion_maps import DiffusionMaps
+
+
+class GeometricHarmonicsInterpolator(NDInterpolatorBase):
+    """Geometric Harmonics interpolator.
+
+    """
+    def __init__(self, points: np.array, values: np.array, epsilon: float,
+                 diffusion_maps_options: Optional[Dict] = None) -> None:
+        NDInterpolatorBase.__init__(self, points, values,
+                                    need_contiguous=False, need_values=True)
+        self.epsilon = epsilon
+        if diffusion_maps_options is None:
+            diffusion_maps_options = dict()
+        diffusion_maps_options['normalize_kernel'] = False
+        self.diffusion_maps = DiffusionMaps(self.points, epsilon,
+                                            **diffusion_maps_options)
+
+    def __call__(self, *args):
+        """Evaluate interpolator at the given points.
+
+        """
+        xi = _ndim_coords_from_arrays(args, ndim=self.points.shape[1])
+        xi = self._check_call_shape(xi)
+
+        kdtree = scipy.spatial.cKDTree(xi)
+        dmaps_kdtree = self.diffusion_maps._kdtree
+        radius = self.diffusion_maps._cut_off
+
+        ew = self.diffusion_maps.eigenvalues
+        ev = self.diffusion_maps.eigenvectors
+        aux = ev.T @ np.diag(1.0 / ew) @ ev @ self.values
+
+        distance_matrix \
+            = kdtree.sparse_distance_matrix(dmaps_kdtree, radius,
+                                            output_type='coo_matrix')
+        kernel_matrix \
+            = self.diffusion_maps._compute_kernel_matrix(distance_matrix)
+
+        return np.squeeze(kernel_matrix @ aux)
diff --git a/diffusion_maps/gpu_eigensolver.py b/diffusion_maps/gpu_eigensolver.py
new file mode 100644
index 0000000..f8b6d7c
--- /dev/null
+++ b/diffusion_maps/gpu_eigensolver.py
@@ -0,0 +1,256 @@
+"""Arnoldi iteration eigensolver using ARPACK-NG.
+
+See also: https://github.com/opencollab/arpack-ng
+
+"""
+
+
+from ctypes import (byref, cdll, create_string_buffer, c_int, c_char,
+                    c_char_p, c_double, POINTER, sizeof)
+import ctypes.util
+import logging
+from typing import Optional, Tuple
+import sys
+
+try:
+    import pycuda.gpuarray as gpuarray
+except ImportError:
+    gpuarray = None
+
+import numpy as np
+import scipy.sparse
+
+from . import matrix_vector_product as mvp
+from . import clock
+
+
+EPSILON = sys.float_info.epsilon
+
+MAX_ITERATIONS = int(1e7)
+
+
+arpack = cdll.LoadLibrary(ctypes.util.find_library('arpack'))
+
+dnaupd = arpack.dnaupd_
+dnaupd.argtypes = [POINTER(c_int), c_char_p, POINTER(c_int), c_char_p,
+                   POINTER(c_int), POINTER(c_double), POINTER(c_double),
+                   POINTER(c_int), POINTER(c_double), POINTER(c_int),
+                   POINTER(c_int), POINTER(c_int), POINTER(c_double),
+                   POINTER(c_double), POINTER(c_int), POINTER(c_int)]
+dnaupd_messages = dict([
+    (0, "Normal exit."),
+    (1, "Maximum number of iterations taken. "
+     "All possible eigenvalues of OP has been found. "
+     "IPARAM(5) returns the number of wanted converged Ritz values."),
+    (2, "No longer an informational error. "
+     "Deprecated starting with release 2 of ARPACK."),
+    (3, "No shifts could be applied during a cycle of the Implicitly "
+     "restarted Arnoldi iteration. One possibility is to increase the"
+     " size of NCV relative to NEV."),
+    (-1, "N must be positive."),
+    (-2, "NEV must be positive."),
+    (-3, "NCV-NEV >= 2 and less than or equal to N."),
+    (-4, "The maximum number of Arnoldi update iteration must be "
+     "greater than zero."),
+    (-5, "WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'"),
+    (-6, "BMAT must be one of 'I' or 'G'."),
+    (-7, "Length of private work array is not sufficient."),
+    (-8, "Error return from LAPACK eigenvalue calculation;"),
+    (-9, "Starting vector is zero."),
+    (-10, "IPARAM(7) must be 1,2,3,4."),
+    (-11, "IPARAM(7) = 1 and BMAT = 'G' are incompatible."),
+    (-12, "IPARAM(1) must be equal to 0 or 1."),
+    (-9999, "Could not build an Arnoldi factorization. "
+     "IPARAM(5) returns the size of the current Arnoldi factorization.")])
+
+dneupd = arpack.dneupd_
+# dneupd.argtypes = [POINTER(c_int), c_char_p, POINTER(c_int),
+#                    POINTER(c_double), POINTER(c_double), POINTER(c_double),
+#                    POINTER(c_int), POINTER(c_double), POINTER(c_double),
+#                    POINTER(c_double), c_char_p, POINTER(c_int), c_char_p,
+#                    POINTER(c_int), POINTER(c_double), POINTER(c_double),
+#                    POINTER(c_int), POINTER(c_double), POINTER(c_int),
+#                    POINTER(c_int), POINTER(c_int), POINTER(c_double),
+#                    POINTER(c_double), POINTER(c_int), POINTER(c_int)]
+dneupd_messages = dict([
+    (0, "Normal exit."),
+    (1, "The Schur form computed by LAPACK routine dlahqr could not be "
+     "reordered by LAPACK routine dtrsen. Re-enter subroutine dneupd with "
+     "IPARAM(5)=NCV and increase the size of the arrays DR and DI to have "
+     "dimension at least dimension NCV and allocate at least NCV columns "
+     "for Z. NOTE, \"Not necessary if Z and V share the same space. "
+     "Please notify the authors if this error occurs.\""),
+    (-1, "N must be positive."),
+    (-2, "NEV must be positive."),
+    (-3, "NCV-NEV >= 2 and less than or equal to N."),
+    (-5, "WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'"),
+    (-6, "BMAT must be one of 'I' or 'G'."),
+    (-7, "Length of private work WORKL array is not sufficient."),
+    (-8, "Error return from calculation of a real Schur form. "
+     "Informational error from LAPACK routine dlahqr."),
+    (-9, "Error return from calculation of eigenvectors. "
+     "Informational error from LAPACK routine dtrevc."),
+    (-10, "IPARAM(7) must be 1,2,3,4."),
+    (-11, "IPARAM(7) = 1 and BMAT = 'G' are incompatible."),
+    (-12, "HOWMNY = 'S' not yet implemented."),
+    (-13, "HOWMNY must be one of 'A' or 'P' if RVEC = .true."),
+    (-14, "DNAUPD did not find any eigenvalues to sufficient accuracy."),
+    (-15, "DNEUPD got a different count of the number of converged Ritz "
+     "values than DNAUPD got. This indicates the user probably made an "
+     "error in passing data from DNAUPD to DNEUPD or that the data was "
+     "modified before entering DNEUPD.")])
+
+
+class ArpackError(Exception):
+    pass
+
+
+def eigensolver(matrix: scipy.sparse.csr_matrix,
+                num_eigenpairs: Optional[int] = 10,
+                sigma: Optional[float] = None,
+                initial_vector: Optional[np.array] = None) \
+        -> Tuple[np.array, np.array]:
+    """Solve eigenvalue problem for sparse matrix.
+
+    Parameters
+    ----------
+    matrix : scipy.sparse.spmatrix
+        A matrix in compressed sparse row format.
+    num_eigenpairs : int, optional
+        Number of eigenpairs to compute. Default is 10.
+    sigma : float, optional
+        Find eigenvalues close to the value of sigma. The default value is
+        near 1.0. Currently unsupported on the GPU.
+    initial_vector : np.array, optional
+        Initial vector to use in the Arnoldi iteration. If not set, a vector
+        with all entries equal to one will be used.
+
+    Returns
+    -------
+    ew : np.array
+        Eigenvalues in descending order of magnitude.
+    ev : np.array
+        Eigenvectors corresponding to the eigenvalues in `ew`.
+
+    """
+    if sigma is not None:
+        raise RuntimeError('Shift/invert mode not implemented on the GPU')
+
+    N = matrix.shape[0]
+
+    if initial_vector is None:
+        initial_vector = np.ones(N)
+
+    n = c_int(N)                # Dimension of the eigenproblem.
+    maxn = n
+    nev = c_int(num_eigenpairs + 1)  # Number of eigenvalues to compute.
+    ncv = c_int(num_eigenpairs + 3)  # Number of columns of the matrix V.
+    maxncv = ncv.value
+    ldv = c_int(maxn.value)
+
+    # assert 0 < nev.value < n.value - 1
+    # assert 2 - nev.value <= ncv.value <= n.value
+
+    tol = c_double(EPSILON)
+
+    d = (c_double * (3 * maxncv))()
+
+    resid = initial_vector.ctypes.data_as(POINTER(c_double))
+
+    vnp = np.zeros((maxncv, ldv.value), dtype=np.float64)
+    v = vnp.ctypes.data_as(POINTER(c_double))
+
+    workdnp = np.zeros(3 * maxn.value, dtype=np.float64)
+    workd = workdnp.ctypes.data_as(POINTER(c_double))
+    workev = (c_double * (3 * maxncv))()
+    workl = (c_double * (3 * maxncv * maxncv + 6 * maxncv))()
+
+    ipntr = (c_int * 14)()
+    select = (c_int * maxncv)()
+
+    bmat = create_string_buffer(b'I')  # B = I, standard eigenvalue problem.
+    which = create_string_buffer(b'LM')  # Eigenvalues of largest magnitude.
+    ido = c_int(0)
+    lworkl = c_int(len(workl))
+    info = c_int(1)
+
+    ishfts = c_int(1)   # Use exact shifts.
+    maxitr = c_int(MAX_ITERATIONS)
+    # mode = c_int(3)           # A x = lambda x (OP = inv(A - sigma I), B = I)
+    mode = c_int(1)           # A x = lambda x (OP = A, B = I)
+    iparam = (c_int * 11)(ishfts, 0, maxitr, 0, 0, 0, mode)
+
+    ierr = c_int(0)
+    rvec = c_int(1)
+    howmny = c_char(b'A')
+    if sigma is not None:
+        sigmar = c_double(np.real(sigma))
+        sigmai = c_double(np.imag(sigma))
+    else:
+        sigmar = c_double()
+        sigmai = c_double()
+
+    MVP = mvp.MatrixVectorProduct(matrix)
+
+    # eye = scipy.sparse.spdiags(np.ones(N), 0, N, N)
+    # A = (matrix - (sigma) * eye).tocsc()
+    # solve = scipy.sparse.linalg.factorized(A)
+
+    logging.debug('Running Arnoldi iteration with tolerance {:g}...'
+                  .format(tol.value))
+
+    clk = clock.Clock()
+    clk.tic()
+    for itr in range(maxitr.value):
+        dnaupd(byref(ido), bmat, byref(n), which, byref(nev), byref(tol),
+               resid, byref(ncv), v, byref(ldv), iparam, ipntr, workd, workl,
+               byref(lworkl), byref(info))
+
+        if info.value != 0:
+            raise ArpackError(dnaupd_messages[info.value])
+
+        if ido.value == 99:
+            break
+        elif abs(ido.value) != 1:
+            logging.warning('DNAUPD repoted IDO = {}'.format(ido.value))
+            break
+
+        idx_rhs, idx_sol = ipntr[0] - 1, ipntr[1] - 1
+        rhs = workdnp[idx_rhs:(idx_rhs+N)]
+
+        # # sol = solve(rhs)
+        # sol = matrix @ rhs
+        # workdnp[idx_sol:idx_sol+N] = sol
+        sol = MVP.product(gpuarray.to_gpu(rhs.astype(np.float64)))
+        workdnp[idx_sol:idx_sol+N] = sol.get()
+    clk.toc()
+
+    logging.debug('Done with Arnoldi iteration after {} steps. '
+                  'Elapsed time: {} seconds'.format(itr, clk))
+
+    logging.debug('Running post-processing step...')
+
+    clk.tic()
+    d0 = byref(d, 0)
+    d1 = byref(d, maxncv * sizeof(c_double))
+    dneupd(byref(rvec), byref(howmny), select, d0, d1, v, byref(ldv),
+           byref(sigmar), byref(sigmai), workev, byref(bmat), byref(n),
+           which, byref(nev), byref(tol), resid, byref(ncv), v, byref(ldv),
+           iparam, ipntr, workd, workl, byref(lworkl), byref(ierr))
+    clk.toc()
+
+    logging.debug('Done with postprocessing step after {} seconds. '
+                  'Status: {}'.format(clk, ('OK' if ierr.value == 0
+                                            else 'FAIL')))
+
+    if ierr.value != 0:
+        raise ArpackError(dneupd_messages[ierr.value])
+
+    nconv = iparam[4] - 1
+    logging.debug('Converged on {} eigenpairs.'.format(nconv))
+
+    ew = np.array(d[:nconv])
+    ii = np.argsort(np.abs(ew))[::-1]
+    ev = vnp[ii, :]
+
+    return ew[ii], ev
diff --git a/diffusion_maps/matrix_vector_product.py b/diffusion_maps/matrix_vector_product.py
new file mode 100644
index 0000000..ed68b3d
--- /dev/null
+++ b/diffusion_maps/matrix_vector_product.py
@@ -0,0 +1,37 @@
+import numpy as np
+
+import scipy.sparse
+
+import pycuda.gpuarray as gpuarray
+
+from . import cusparse as cs
+
+
+class MatrixVectorProduct:
+    """Perform GPU-based, sparse matrix-vector products."""
+    def __init__(self, matrix: scipy.sparse.csr_matrix) -> None:
+        self.m = matrix.shape[0]
+        self.n = matrix.shape[1]
+        self.nnz = matrix.nnz
+        self.csrValA = gpuarray.to_gpu(matrix.data.astype(np.float64))
+        self.csrRowPtrA = gpuarray.to_gpu(matrix.indptr)
+        self.csrColIndA = gpuarray.to_gpu(matrix.indices)
+        self.handle = cs.cusparseCreate()
+        self.descr = cs.cusparseCreateMatDescr()
+
+    def __del__(self) -> None:
+        if self.descr is not None:
+            cs.cusparseDestroyMatDescr(self.descr)
+            self.descr = None
+        if self.handle is not None:
+            cs.cusparseDestroy(self.handle)
+            self.handle = None
+
+    def product(self, x: gpuarray.GPUArray) -> gpuarray.GPUArray:
+        """Multiply sparse matrix by dense vector."""
+        y = gpuarray.empty_like(x)
+        op = cs.cusparseOperation.CUSPARSE_OPERATION_NON_TRANSPOSE
+        cs.cusparseDcsrmv(self.handle, op, self.m, self.n, self.nnz, 1.0,
+                          self.descr, self.csrValA, self.csrRowPtrA,
+                          self.csrColIndA, x, 0.0, y)
+        return y
diff --git a/diffusion_maps/plot.py b/diffusion_maps/plot.py
new file mode 100644
index 0000000..8704c0d
--- /dev/null
+++ b/diffusion_maps/plot.py
@@ -0,0 +1,67 @@
+"""Module for plotting eigenvectors.
+
+"""
+
+__all__ = ['plot_eigenvectors']
+
+from typing import Tuple
+
+import matplotlib.pyplot as plt
+
+import numpy as np
+
+from . import default
+
+
+def get_rows_and_columns(num_plots: int) -> Tuple[int, int]:
+    """Get optimal number of rows and columns to display figures.
+
+    Parameters
+    ----------
+    num_plots : int
+        Number of subplots
+
+    Returns
+    -------
+    rows : int
+        Optimal number of rows.
+    cols : int
+        Optimal number of columns.
+
+    """
+    if num_plots <= 10:
+        layouts = {
+            1: (1, 1), 2: (1, 2), 3: (1, 3), 4: (2, 2), 5: (2, 3),
+            6: (2, 3), 7: (2, 4), 8: (2, 4), 9: (3, 9), 10: (2, 5)
+        }
+        rows, cols = layouts[num_plots]
+    else:
+        rows = int(np.ceil(np.sqrt(num_plots)))
+        cols = rows
+
+    return rows, cols
+
+
+def plot_eigenvectors(data: np.array, eigenvectors: np.array) -> None:
+    """Plot eigenvectors onto 2D data.
+
+    """
+    x = data[:, 0]
+    y = data[:, 1]
+
+    num_eigenvectors = min(eigenvectors.shape[0]-1, default.num_eigenpairs-1)
+
+    rows, cols = get_rows_and_columns(num_eigenvectors)
+
+    for k in range(1, eigenvectors.shape[0]):
+        plt.subplot(rows, cols, k)
+        plt.scatter(x, y, c=eigenvectors[k, :], cmap='RdBu_r',
+                    rasterized=True)
+        plt.xlabel('$x$')
+        plt.ylabel('$y$')
+        cb = plt.colorbar()
+        cb.set_label('Eigenvector value')
+        plt.title('$\\psi_{{{}}}$'.format(k))
+
+    # plt.tight_layout()
+    plt.show()
diff --git a/diffusion_maps/profiler.py b/diffusion_maps/profiler.py
new file mode 100644
index 0000000..4e5deb0
--- /dev/null
+++ b/diffusion_maps/profiler.py
@@ -0,0 +1,29 @@
+"""Context manager for running code under the Python profiler.
+
+"""
+
+
+__all__ = ['Profiler']
+
+import cProfile as profile
+
+
+class Profiler:
+    """Run code under the profiler and report at the end."""
+    def __init__(self, stream):
+        """Initialize profiler with an open stream."""
+        self.stream = stream
+
+    def __enter__(self):
+        if self.stream is not None:
+            self.profile = profile.Profile()
+            self.profile.enable()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if self.stream is not None:
+            self.profile.disable()
+
+            import pstats
+            ps = pstats.Stats(self.profile, stream=self.stream)
+            ps.sort_stats('cumulative')
+            ps.print_stats()
diff --git a/diffusion_maps/tests/__init__.py b/diffusion_maps/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/diffusion_maps/tests/test_diffusion_maps.py b/diffusion_maps/tests/test_diffusion_maps.py
new file mode 100644
index 0000000..91c65c5
--- /dev/null
+++ b/diffusion_maps/tests/test_diffusion_maps.py
@@ -0,0 +1,120 @@
+"""Unit test for the diffusion_maps module.
+
+"""
+
+import logging
+import unittest
+
+import numpy as np
+# import matplotlib.pyplot as plt
+
+from diffusion_maps import DiffusionMaps, downsample
+
+
+def make_strip(xmin: float, ymin: float, width: float,
+               height: float, num_samples: int) -> np.array:
+    """Draw samples from a 2D strip with uniform distribution.
+
+    """
+    x = width * np.random.rand(num_samples) - xmin
+    y = height * np.random.rand(num_samples) - ymin
+
+    return np.stack((x, y), axis=-1)
+
+
+class DiffusionMapsTest(unittest.TestCase):
+    def setUp(self):
+        logging.basicConfig(level=logging.DEBUG)
+        self.xmin = 0.0
+        self.ymin = 0.0
+        self.width = 1.0
+        self.height = 1e-1
+        self.num_samples = 50000
+        self.data = make_strip(self.xmin, self.ymin,
+                               self.width, self.height,
+                               self.num_samples)
+
+    @staticmethod
+    def _compute_rayleigh_quotients(matrix, eigenvectors):
+        """Compute Rayleigh quotients."""
+        N = eigenvectors.shape[0]
+        rayleigh_quotients = np.zeros(N)
+        for n in range(N):
+            v = eigenvectors[n, :]
+            rayleigh_quotients[n] = np.dot(v, matrix @ v) / np.dot(v, v)
+        rayleigh_quotients = np.sort(np.abs(rayleigh_quotients))
+        return rayleigh_quotients[::-1]
+
+    def test_accuracy(self):
+        num_samples = 10000
+        logging.debug('Computing diffusion maps on a matrix of size {}'
+                      .format(num_samples))
+        num_eigenpairs = 10
+        epsilon = 5e-1
+        downsampled_data = downsample(self.data, num_samples)
+
+        dm = DiffusionMaps(downsampled_data, epsilon,
+                           num_eigenpairs=num_eigenpairs)
+
+        ew = dm.eigenvalues
+        rq = self._compute_rayleigh_quotients(dm.kernel_matrix,
+                                              dm.eigenvectors)
+
+        logging.debug('Eigenvalues: {}'.format(ew))
+        logging.debug('Rayleigh quotients: {}'.format(rq))
+
+        self.assertTrue(np.allclose(np.abs(ew), np.abs(rq)))
+
+        # import scipy.io
+        # scipy.io.mmwrite('kernel_matrix.mtx', dm.kernel_matrix)
+
+    def test_multiple_epsilon_values(self):
+        num_samples = 5000
+        num_maps = 10
+        num_eigenpairs = 10
+        epsilon_min, epsilon_max = 1e-1, 1e1
+        epsilons = np.logspace(np.log10(epsilon_min),
+                               np.log10(epsilon_max), num_maps)
+
+        downsampled_data = downsample(self.data, num_samples)
+
+        evs = np.zeros((num_maps, num_eigenpairs, downsampled_data.shape[0]))
+        ews = np.zeros((num_maps, num_eigenpairs))
+
+        logging.basicConfig(level=logging.WARNING)
+
+        for i, epsilon in enumerate(reversed(epsilons)):
+            dm = DiffusionMaps(downsampled_data, epsilon,
+                               num_eigenpairs=num_eigenpairs)
+
+            evs[i, :, :] = dm.eigenvectors
+            ews[i, :] = dm.eigenvalues
+
+            ew = dm.eigenvalues
+            rq = self._compute_rayleigh_quotients(dm.kernel_matrix,
+                                                  dm.eigenvectors)
+            self.assertTrue(np.allclose(np.abs(ew), np.abs(rq)))
+
+            # plt.title('$\\epsilon$ = {:.3f}'.format(epsilon))
+            # for k in range(1, 10):
+            #     plt.subplot(2, 5, k)
+            #     plt.scatter(downsampled_data[:, 0], downsampled_data[:, 1],
+            #                 c=evs[i, k, :])
+            #     plt.xlim([self.xmin, self.xmin + self.width])
+            #     plt.ylim([self.ymin, self.ymin + self.height])
+            #     plt.tight_layout()
+            #     plt.gca().set_title('$\\psi_{}$'.format(k))
+            # plt.subplot(2, 5, 10)
+            # plt.step(range(ews[i, :].shape[0]), np.abs(ews[i, :]))
+            # plt.title('epsilon = {:.2f}'.format(epsilon))
+            # plt.show()
+
+
+if __name__ == '__main__':
+    import os
+    verbose = os.getenv('VERBOSE')
+    if verbose is not None:
+        logging.basicConfig(level=logging.DEBUG, format='%(message)s')
+    else:
+        logging.basicConfig(level=logging.ERROR, format='%(message)s')
+    unittest.main()
diff --git a/diffusion_maps/tests/test_geometric_harmonics.py b/diffusion_maps/tests/test_geometric_harmonics.py
new file mode 100644
index 0000000..030e932
--- /dev/null
+++ b/diffusion_maps/tests/test_geometric_harmonics.py
@@ -0,0 +1,120 @@
+"""Unit test for the Geometric Harmonics module.
+
+"""
+
+import logging
+import unittest
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+from diffusion_maps import GeometricHarmonicsInterpolator
+
+from diffusion_maps.clock import Clock
+
+
+def make_points(num_points: int) -> np.array:
+    xx, yy = np.meshgrid(np.linspace(-4, 4, num_points),
+                         np.linspace(-4, 4, num_points))
+    return np.stack((xx.ravel(), yy.ravel())).T
+
+
+def plot(points: np.array, values: np.array, **kwargs) -> None:
+    title = kwargs.pop('title', None)
+    if title:
+        plt.title(title)
+    plt.scatter(points[:, 0], points[:, 1], c=values,
+                marker='o', rasterized=True, s=2.5, **kwargs)
+    cb = plt.colorbar()
+    cb.set_clim([np.min(values), np.max(values)])
+    cb.set_ticks(np.linspace(np.min(values), np.max(values), 5))
+    plt.xlim([-4, 4])
+    plt.ylim([-4, 4])
+    plt.xlabel('$x$')
+    plt.ylabel('$y$')
+    plt.gca().set_aspect('equal')
+
+
+def f(points: np.array) -> np.array:
+    """Function to interpolate.
+
+    """
+    # return np.ones(points.shape[0])
+    # return np.arange(points.shape[0])
+    return np.sin(np.linalg.norm(points, axis=-1))
+
+
+def show_info(title: str, values: np.array) -> None:
+    """Log relevant information.
+
+    """
+    logging.info('{}: mean = {:g}, std. = {:g}, max. abs. = {:g}'
+                 .format(title, np.mean(values), np.std(values),
+                         np.max(np.abs(values))))
+
+
+class GeometricHarmonicsTest(unittest.TestCase):
+    def setUp(self):
+        # self.num_points = 1000
+        # self.points = downsample(np.load('data.npy'), self.num_points)
+        # self.values = np.ones(self.num_points)
+
+        # np.save('actual-data.npy', self.points)
+
+        # self.points = np.load('actual-data.npy')
+        # self.num_points = self.points.shape[0]
+        # self.values = np.ones(self.num_points)
+
+        self.points = make_points(23)
+        self.num_points = self.points.shape[0]
+        self.values = f(self.points)
+
+    def test_geometric_harmonics_interpolator(self):
+        logging.basicConfig(level=logging.DEBUG)
+
+        eps = 1e-1
+        dmaps_opts = {'num_eigenpairs': self.num_points-3,
+                      'cut_off': 1e1 * eps}
+        ghi = GeometricHarmonicsInterpolator(self.points, self.values,
+                                             eps, dmaps_opts)
+
+        points = make_points(100)
+
+        with Clock() as clock:
+            values = ghi(points)
+        logging.debug('Evaluation of geometric harmonics done ({} seconds).'.
+                      format(clock))
+
+        residual = values - f(points)
+        self.assertLess(np.max(np.abs(residual)), 7.5e-2)
+
+        show_info('Original function', f(points))
+        show_info('Sampled points', self.values)
+        show_info('Reconstructed function', values)
+        show_info('Residual', residual)
+
+        # plt.subplot(2, 2, 1)
+        # plot(points, f(points), title='Original function')
+        #
+        # plt.subplot(2, 2, 2)
+        # plot(self.points, self.values, title='Sampled function')
+        #
+        # plt.subplot(2, 2, 4)
+        # plot(points, values, title='Reconstructed function')
+        #
+        # plt.subplot(2, 2, 3)
+        # plot(points, residual, title='Residual', cmap='RdBu_r')
+        #
+        # plt.tight_layout()
+        # plt.show()
+
+
+if __name__ == '__main__':
+    import os
+    verbose = os.getenv('VERBOSE')
+    if verbose is not None:
+        logging.basicConfig(level=logging.DEBUG, format='%(message)s')
+    else:
+        logging.basicConfig(level=logging.ERROR, format='%(message)s')
+    unittest.main()
diff --git a/diffusion_maps/utils.py b/diffusion_maps/utils.py
new file mode 100644
index 0000000..8ef648d
--- /dev/null
+++ b/diffusion_maps/utils.py
@@ -0,0 +1,40 @@
+"""Miscellaneous utilities.
+
+"""
+
+
+__all__ = ['coo_tocsr']
+
+
+import numpy as np
+import scipy.sparse
+
+
+def coo_tocsr(A: scipy.sparse.coo_matrix) -> scipy.sparse.csr_matrix:
+    """Convert matrix to Compressed Sparse Row format, fast.
+
+    This function is derived from the corresponding SciPy code but it avoids
+    the sanity checks that slow `scipy.sparse.coo_matrix.to_csr down`. In
+    particular, by not summing duplicates we can attain important speed-ups
+    for large matrices.
+
+    """
+    from scipy.sparse import csr_matrix
+    if A.nnz == 0:
+        return csr_matrix(A.shape, dtype=A.dtype)
+
+    m, n = A.shape
+    # Using 32-bit integer indices allows for matrices of up to 2,147,483,647
+    # non-zero entries.
+    idx_dtype = np.int32
+    row = A.row.astype(idx_dtype, copy=False)
+    col = A.col.astype(idx_dtype, copy=False)
+
+    indptr = np.empty(n+1, dtype=idx_dtype)
+    indices = np.empty_like(row, dtype=idx_dtype)
+    data = np.empty_like(A.data, dtype=A.dtype)
+
+    scipy.sparse._sparsetools.coo_tocsr(m, n, A.nnz, row, col, A.data,
+                                        indptr, indices, data)
+
+    return csr_matrix((data, indices, indptr), shape=A.shape)
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 0000000..943885b
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+SPHINXPROJ    = diffusion-maps
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
diff --git a/doc/conf.py b/doc/conf.py
new file mode 100644
index 0000000..30c31a0
--- /dev/null
+++ b/doc/conf.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# diffusion-maps documentation build configuration file, created by
+# sphinx-quickstart on Sat May 20 12:03:02 2017.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.autodoc',
+    'sphinx.ext.mathjax',
+    'sphinx.ext.viewcode']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = 'diffusion-maps'
+copyright = '2017, Juan M. Bello-Rivas'
+author = 'Juan M. Bello-Rivas'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = ''
+# The full version, including alpha/beta/rc tags.
+release = ''
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'alabaster'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'diffusion-mapsdoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'diffusion-maps.tex', 'diffusion-maps Documentation',
+     'Juan M. Bello-Rivas', 'manual'),
+]
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'diffusion-maps', 'diffusion-maps Documentation',
+     [author], 1)
+]
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'diffusion-maps', 'diffusion-maps Documentation',
+     author, 'diffusion-maps', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+
+
diff --git a/doc/diffusion_maps.rst b/doc/diffusion_maps.rst
new file mode 100644
index 0000000..0ea3aad
--- /dev/null
+++ b/doc/diffusion_maps.rst
@@ -0,0 +1,125 @@
+diffusion\_maps package
+=======================
+
+Subpackages
+-----------
+
+.. toctree::
+
+    diffusion_maps.tests
+
+Submodules
+----------
+
+diffusion\_maps\.clock module
+-----------------------------
+
+.. automodule:: diffusion_maps.clock
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.command\_line\_interface module
+------------------------------------------------
+
+.. automodule:: diffusion_maps.command_line_interface
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.cpu\_eigensolver module
+----------------------------------------
+
+.. automodule:: diffusion_maps.cpu_eigensolver
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.cusparse module
+--------------------------------
+
+.. automodule:: diffusion_maps.cusparse
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.default module
+-------------------------------
+
+.. automodule:: diffusion_maps.default
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.diffusion\_maps module
+---------------------------------------
+
+.. automodule:: diffusion_maps.diffusion_maps
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.geometric\_harmonics module
+--------------------------------------------
+
+.. automodule:: diffusion_maps.geometric_harmonics
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.gpu\_eigensolver module
+----------------------------------------
+
+.. automodule:: diffusion_maps.gpu_eigensolver
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.matrix\_vector\_product module
+-----------------------------------------------
+
+.. automodule:: diffusion_maps.matrix_vector_product
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.plot module
+----------------------------
+
+.. automodule:: diffusion_maps.plot
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.profiler module
+--------------------------------
+
+.. automodule:: diffusion_maps.profiler
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.utils module
+-----------------------------
+
+.. automodule:: diffusion_maps.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+diffusion\_maps\.version module
+-------------------------------
+
+.. automodule:: diffusion_maps.version
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: diffusion_maps
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 0000000..3e209a6
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,22 @@
+.. diffusion-maps documentation master file, created by
+   sphinx-quickstart on Sat May 20 12:03:02 2017.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to diffusion-maps's documentation!
+==========================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   modules
+
+.. automodule:: diffusion_maps
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/doc/make.bat b/doc/make.bat
new file mode 100644
index 0000000..4e0923d
--- /dev/null
+++ b/doc/make.bat
@@ -0,0 +1,36 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+set SPHINXPROJ=diffusion-maps
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/doc/modules.rst b/doc/modules.rst
new file mode 100644
index 0000000..64f1ec4
--- /dev/null
+++ b/doc/modules.rst
@@ -0,0 +1,7 @@
+diffusion_maps
+==============
+
+.. toctree::
+   :maxdepth: 4
+
+   diffusion_maps
diff --git a/geometric-harmonics.png b/geometric-harmonics.png
new file mode 100644
index 0000000..03efcdd
Binary files /dev/null and b/geometric-harmonics.png differ
diff --git a/scripts/make-version b/scripts/make-version
new file mode 100755
index 0000000..c6139e6
--- /dev/null
+++ b/scripts/make-version
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+# GIT_ID=`git rev-parse HEAD`
+GIT_ID=`git describe --always`
+GIT_NUM=`git rev-list HEAD --count`
+DATE=`date +%Y-%b-%d`
+DNUM=`date +%Y%m%d`
+
+(cat <<EOF
+__all__ = ['version']
+
+
+class version:
+    """Current version."""
+    v_short = 'DNUM.GIT_NUM'
+    v_long = 'DNUM.GIT_NUM (DATE)'
+    v_gnu = 'diffusion_maps DNUM.GIT_NUM (DATE)'
+EOF
+) | sed -e "s/GIT_ID/$GIT_ID/g" -e "s/DATE/$DATE/g" -e "s/DNUM/$DNUM/g" -e "s/GIT_NUM/$GIT_NUM/g"
+
+echo "diffusion-maps $DNUM.$GIT_NUM ($DATE)" >&2
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..72f7f88
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[flake8]
+ignore = E402, W503, E226
+
+#[easy_install]
+#find-links = ../offline
+#allow-hosts = None
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..17ac700
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+from setuptools import setup, find_packages
+
+import diffusion_maps
+
+
+setup(name='diffusion-maps',
+      version=diffusion_maps.version.v_short,
+      description='Diffusion maps',
+      long_description='Library for computing diffusion maps',
+      license='MIT License',
+      author='Juan M. Bello-Rivas',
+      author_email='jmbr@superadditive.com',
+      packages=find_packages(),
+      package_dir={'diffusion_maps': 'diffusion_maps'},
+      package_data={'': ['LICENSE']},
+      test_suite='nose.collector',
+      tests_require=['nose'],
+      install_requires=['scipy', 'numpy'],
+      extras_require={
+          'plotting': ['matplotlib'],
+          'cuda': ['pycuda']
+      },
+      entry_points={
+          'console_scripts': 'diffusion-maps = diffusion_maps.command_line_interface:main'
+      })