Skip to content

Commit

Permalink
Merge pull request #35 from panagiotisanagnostou/new_features
Browse files Browse the repository at this point in the history
Algorithm execution improvement & Documentation update
  • Loading branch information
panagiotisanagnostou authored Jun 23, 2023
2 parents c59ca76 + b589572 commit 2495a66
Show file tree
Hide file tree
Showing 8 changed files with 196 additions and 27 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
author = 'Panagiotis Anagnostou'

# The full version, including alpha/beta/rc tags
release = '0.4.0'
release = '0.4.2'

# -- General configuration ---------------------------------------------------

Expand Down
7 changes: 5 additions & 2 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,16 @@ Acknowledgments
This project has received funding from the Hellenic Foundation for Research and Innovation (HFRI), under grant agreement No 1901.



Contents
-------------

.. toctree::
:maxdepth: 2
:hidden:

self
modules
examples

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()

__version__ = "0.4.1"
__version__ = "0.4.2"

setuptools.setup(
name="HiPart",
Expand Down
2 changes: 1 addition & 1 deletion src/HiPart/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from KDEpy.TreeKDE import TreeKDE
from KDEpy.FFTKDE import FFTKDE

__version__ = "0.4.1"
__version__ = "0.4.2"
__author__ = "Panagiotis Anagnostou"

TreeKDE = TreeKDE
Expand Down
13 changes: 11 additions & 2 deletions src/HiPart/__utility_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,18 @@ def execute_decomposition_method(
Parameters
----------
two_dimentions
data_matrix : numpy.ndarray
The data matrix contains all the data for the samples.
decomposition_method : str
One of 'kpca', 'pca' and 'ica' the decomposition methods supported by
this software.
two_dimentions : bool
If True the projection will be on the first two components of the 'pca'
and 'ica' methods. The 'kpca' and 'tsne' methods will be projected only
on one dimension because of the nature.
decomposition_args : dict
Arguments to use by each of the decomposition methods utilized by the
HIDIV package.
HiPart package.
Returns
-------
Expand All @@ -87,6 +90,12 @@ def execute_decomposition_method(
"""
if two_dimentions:
if decomposition_method in ["tsne"]:
raise ValueError(
": The decomposition method ("
+ decomposition_method
+ ") cannot be executed correctly for two dimentions!"
)
n_of_dimentions = 2
else:
n_of_dimentions = 1
Expand Down
125 changes: 111 additions & 14 deletions src/HiPart/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import HiPart.__utility_functions as util
import numpy as np
import statsmodels.api as sm
import warnings

from KDEpy import FFTKDE
from scipy import stats
Expand Down Expand Up @@ -61,7 +62,9 @@ class DePDDP:
visualization_utility : bool, (optional)
If (True) generate the data needed by the visualization utilities of
the package otherwise, if false the split_visualization and
interactive_visualization of the package can not be created.
interactive_visualization of the package can not be created. For the
'tsne' decomposition method does not support visualization because it
affects the correct execution of the dePDDP algorithm.
**decomposition_args :
Arguments for each of the decomposition methods ("decomposition.PCA" as
"pca", "decomposition.KernelPCA" as "kpca", "decomposition.FastICA" as
Expand Down Expand Up @@ -97,7 +100,11 @@ def __init__(
self.bandwidth_scale = bandwidth_scale
self.percentile = percentile
self.min_sample_split = min_sample_split
self.visualization_utility = visualization_utility
if decomposition_method in ["tsne"]:
self.visualization_utility = False
warnings.warn("DePDDP: does not support visualization for 'tsne'.")
else:
self.visualization_utility = visualization_utility
self.decomposition_args = decomposition_args

def fit(self, X):
Expand Down Expand Up @@ -430,6 +437,24 @@ def min_sample_split(self, v):
)
self._min_sample_split = v

@property
def visualization_utility(self):
return self._visualization_utility

@visualization_utility.setter
def visualization_utility(self, v):
if v is not True and v is not False:
raise ValueError(
"DePDDP: visualization_utility: Should be True or False"
)

if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]:
raise ValueError(
"DePDDP: visualization_utility: 'tsne' method is can't be used"
+ " with the visualization utility."
)
self._visualization_utility = v

@property
def tree(self):
return self._tree
Expand Down Expand Up @@ -517,7 +542,9 @@ class IPDDP:
visualization_utility : bool, (optional)
If (True) generate the data needed by the visualization utilities of
the package otherwise, if false the split_visualization and
interactive_visualization of the package can not be created.
interactive_visualization of the package can not be created. For the
'tsne' decomposition method does not support visualization because it
affects the correct execution of the iPDDP algorithm.
**decomposition_args :
Arguments for each of the decomposition methods ("decomposition.PCA" as
"pca", "decomposition.KernelPCA" as "kpca", "decomposition.FastICA" as
Expand Down Expand Up @@ -551,7 +578,11 @@ def __init__(
self.max_clusters_number = max_clusters_number
self.percentile = percentile
self.min_sample_split = min_sample_split
self.visualization_utility = visualization_utility
if decomposition_method in ["tsne"]:
self.visualization_utility = False
warnings.warn("IPDDP: does not support visualization for 'tsne'.")
else:
self.visualization_utility = visualization_utility
self.decomposition_args = decomposition_args

def fit(self, X):
Expand Down Expand Up @@ -868,6 +899,23 @@ def min_sample_split(self, v):
)
self._min_sample_split = v

@property
def visualization_utility(self):
return self._visualization_utility

@visualization_utility.setter
def visualization_utility(self, v):
if v is not True and v is not False:
raise ValueError(
"IPDDP: visualization_utility: Should be True or False"
)

if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]:
raise ValueError(
"IPDDP: visualization_utility: Should be pca when visualization_utility is True."
)
self._visualization_utility = v

@property
def tree(self):
return self._tree
Expand Down Expand Up @@ -952,7 +1000,9 @@ class KMPDDP:
visualization_utility : bool, (optional)
If (True) generate the data needed by the visualization utilities of
the package otherwise, if false the split_visualization and
interactive_visualization of the package can not be created.
interactive_visualization of the package can not be created. For the
'tsne' decomposition method does not support visualization because it
affects the correct execution of the kMeans-PDDP algorithm.
random_seed : int, (optional)
The random seed fed in the k-Means algorithm
**decomposition_args :
Expand Down Expand Up @@ -987,7 +1037,11 @@ def __init__(
self.decomposition_method = decomposition_method
self.max_clusters_number = max_clusters_number
self.min_sample_split = min_sample_split
self.visualization_utility = visualization_utility
if decomposition_method in ["tsne"]:
self.visualization_utility = False
warnings.warn("KMPDDP: does not support visualization for 'tsne'.")
else:
self.visualization_utility = visualization_utility
self.random_seed = random_seed
self.decomposition_args = decomposition_args

Expand Down Expand Up @@ -1304,6 +1358,23 @@ def random_seed(self, v):
)
self._random_seed = v

@property
def visualization_utility(self):
return self._visualization_utility

@visualization_utility.setter
def visualization_utility(self, v):
if v is not True and v is not False:
raise ValueError(
"KMPDDP: visualization_utility: Should be True or False"
)

if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]:
raise ValueError(
"KMPDDP: visualization_utility: Should be pca when visualization_utility is True"
)
self._visualization_utility = v

@property
def tree(self):
return self._tree
Expand Down Expand Up @@ -1385,7 +1456,9 @@ class PDDP:
visualization_utility : bool, (optional)
If (True) generate the data needed by the visualization utilities of
the package otherwise, if false the split_visualization and
interactive_visualization of the package can not be created.
interactive_visualization of the package can not be created. For the
'tsne' decomposition method does not support visualization because it
affects the correct execution of the PDDP algorithm.
**decomposition_args :
Arguments for each of the decomposition methods ("decomposition.PCA" as
"pca", "decomposition.KernelPCA" as "kpca", "decomposition.FastICA" as
Expand Down Expand Up @@ -1417,7 +1490,11 @@ def __init__(
self.decomposition_method = decomposition_method
self.max_clusters_number = max_clusters_number
self.min_sample_split = min_sample_split
self.visualization_utility = visualization_utility
if decomposition_method in ["tsne"]:
self.visualization_utility = False
warnings.warn("PDDP: does not support visualization for 'tsne'.")
else:
self.visualization_utility = visualization_utility
self.decomposition_args = decomposition_args

def fit(self, X):
Expand Down Expand Up @@ -1705,6 +1782,23 @@ def min_sample_split(self, v):
)
self._min_sample_split = v

@property
def visualization_utility(self):
return self._visualization_utility

@visualization_utility.setter
def visualization_utility(self, v):
if v is not True and v is not False:
raise ValueError(
"PDDP: visualization_utility: Should be True or False"
)

if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]:
raise ValueError(
"PDDP: visualization_utility: Should be pca when visualization_utility is True"
)
self._visualization_utility = v

@property
def tree(self):
return self._tree
Expand Down Expand Up @@ -2123,19 +2217,22 @@ def output_matrix(self):

@output_matrix.setter
def output_matrix(self, v):
self._output_matrix = v
raise RuntimeError(
"BisectingKmeans: output_matrix: can only be generated and not to be assigned!"
)

@property
def labels_(self):
labels_ = np.ones(np.size(self.X, 0))
for i in self.tree.leaves():
labels_[i.data["indices"]] = i.identifier
self.labels_ = labels_
return self._labels_
return labels_

@labels_.setter
def labels_(self, v):
self._labels_ = v
raise RuntimeError(
"BisectingKmeans: labels_: can only be generated and not to be assigned!"
)


class MDH:
Expand All @@ -2162,7 +2259,7 @@ class MDH:
not occur. [0,0.5) values are allowed.
min_sample_split : int, optional
The minimum number of points needed in a cluster for a split to occur.
random_seed : int, optional
random_state : int, optional
The random seed to be used in the algorithm's execution.
Attributes
Expand Down Expand Up @@ -2440,7 +2537,7 @@ def calculate_node_data(self, indices, key):
# 4. njev (number of jacobian/ gradient evaluations)
results, depth = util.md_sqp(initial_v_n_b, node_data, self.k)

# If the algorithm terminated successfully try appending the append the solution
# If the algorithm terminated successfully try to append the solution
if results.success:
v = results.x[:-1] / np.linalg.norm(results.x[:-1])
projection = np.dot(node_data, v).reshape(-1, 1)
Expand Down
Loading

0 comments on commit 2495a66

Please sign in to comment.