Merge pull request #35 from panagiotisanagnostou/new_features

Algorithm execution improvement & Documentation update
panagiotisanagnostou · Jun 23, 2023 · 2495a66 · 2495a66
2 parents c59ca76 + b589572
commit 2495a66
Show file tree

Hide file tree

Showing 8 changed files with 196 additions and 27 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -21,7 +21,7 @@
 author = 'Panagiotis Anagnostou'
 
 # The full version, including alpha/beta/rc tags
-release = '0.4.0'
+release = '0.4.2'
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -71,13 +71,16 @@ Acknowledgments
 This project has received funding from the Hellenic Foundation for Research and Innovation (HFRI), under grant agreement No 1901.
 
 
+
+Contents
+-------------
+
 .. toctree::
    :maxdepth: 2
-   :hidden:
 
+   self
    modules
    examples
 
 * :ref:`genindex`
 * :ref:`modindex`
-* :ref:`search`
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 
-__version__ = "0.4.1"
+__version__ = "0.4.2"
 
 setuptools.setup(
     name="HiPart",

diff --git a/src/HiPart/__init__.py b/src/HiPart/__init__.py
@@ -37,7 +37,7 @@
 from KDEpy.TreeKDE import TreeKDE
 from KDEpy.FFTKDE import FFTKDE
 
-__version__ = "0.4.1"
+__version__ = "0.4.2"
 __author__ = "Panagiotis Anagnostou"
 
 TreeKDE = TreeKDE

diff --git a/src/HiPart/__utility_functions.py b/src/HiPart/__utility_functions.py
@@ -69,15 +69,18 @@ def execute_decomposition_method(
 
     Parameters
     ----------
-    two_dimentions
     data_matrix : numpy.ndarray
         The data matrix contains all the data for the samples.
     decomposition_method : str
         One of 'kpca', 'pca' and 'ica' the decomposition methods supported by
         this software.
+    two_dimentions : bool
+        If True the projection will be on the first two components of the 'pca'
+        and 'ica' methods. The 'kpca' and 'tsne' methods will be projected only
+        on one dimension because of the nature.
     decomposition_args : dict
         Arguments to use by each of the decomposition methods utilized by the
-        HIDIV package.
+        HiPart package.
 
     Returns
     -------
@@ -87,6 +90,12 @@ def execute_decomposition_method(
 
     """
     if two_dimentions:
+        if decomposition_method in ["tsne"]:
+            raise ValueError(
+                ": The decomposition method ("
+                + decomposition_method
+                + ") cannot be executed correctly for two dimentions!"
+            )
         n_of_dimentions = 2
     else:
         n_of_dimentions = 1

diff --git a/src/HiPart/clustering.py b/src/HiPart/clustering.py
@@ -27,6 +27,7 @@
 import HiPart.__utility_functions as util
 import numpy as np
 import statsmodels.api as sm
+import warnings
 
 from KDEpy import FFTKDE
 from scipy import stats
@@ -61,7 +62,9 @@ class DePDDP:
     visualization_utility : bool, (optional)
         If (True) generate the data needed by the visualization utilities of
         the package otherwise, if false the split_visualization and
-        interactive_visualization of the package can not be created.
+        interactive_visualization of the package can not be created. For the
+        'tsne' decomposition method does not support visualization because it
+        affects the correct execution of the dePDDP algorithm.
     **decomposition_args :
         Arguments for each of the decomposition methods ("decomposition.PCA" as
         "pca", "decomposition.KernelPCA" as "kpca", "decomposition.FastICA" as
@@ -97,7 +100,11 @@ def __init__(
         self.bandwidth_scale = bandwidth_scale
         self.percentile = percentile
         self.min_sample_split = min_sample_split
-        self.visualization_utility = visualization_utility
+        if decomposition_method in ["tsne"]:
+            self.visualization_utility = False
+            warnings.warn("DePDDP: does not support visualization for 'tsne'.")
+        else:
+            self.visualization_utility = visualization_utility
         self.decomposition_args = decomposition_args
 
     def fit(self, X):
@@ -430,6 +437,24 @@ def min_sample_split(self, v):
             )
         self._min_sample_split = v
 
+    @property
+    def visualization_utility(self):
+        return self._visualization_utility
+
+    @visualization_utility.setter
+    def visualization_utility(self, v):
+        if v is not True and v is not False:
+            raise ValueError(
+                "DePDDP: visualization_utility: Should be True or False"
+            )
+
+        if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]:
+            raise ValueError(
+                "DePDDP: visualization_utility: 'tsne' method is can't be used"
+                + " with the visualization utility."
+            )
+        self._visualization_utility = v
+
     @property
     def tree(self):
         return self._tree
@@ -517,7 +542,9 @@ class IPDDP:
     visualization_utility : bool, (optional)
         If (True) generate the data needed by the visualization utilities of
         the package otherwise, if false the split_visualization and
-        interactive_visualization of the package can not be created.
+        interactive_visualization of the package can not be created. For the
+        'tsne' decomposition method does not support visualization because it
+        affects the correct execution of the iPDDP algorithm.
     **decomposition_args :
         Arguments for each of the decomposition methods ("decomposition.PCA" as
         "pca", "decomposition.KernelPCA" as "kpca", "decomposition.FastICA" as
@@ -551,7 +578,11 @@ def __init__(
         self.max_clusters_number = max_clusters_number
         self.percentile = percentile
         self.min_sample_split = min_sample_split
-        self.visualization_utility = visualization_utility
+        if decomposition_method in ["tsne"]:
+            self.visualization_utility = False
+            warnings.warn("IPDDP: does not support visualization for 'tsne'.")
+        else:
+            self.visualization_utility = visualization_utility
         self.decomposition_args = decomposition_args
 
     def fit(self, X):
@@ -868,6 +899,23 @@ def min_sample_split(self, v):
             )
         self._min_sample_split = v
 
+    @property
+    def visualization_utility(self):
+        return self._visualization_utility
+
+    @visualization_utility.setter
+    def visualization_utility(self, v):
+        if v is not True and v is not False:
+            raise ValueError(
+                "IPDDP: visualization_utility: Should be True or False"
+            )
+
+        if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]:
+            raise ValueError(
+                "IPDDP: visualization_utility: Should be pca when visualization_utility is True."
+            )
+        self._visualization_utility = v
+
     @property
     def tree(self):
         return self._tree
@@ -952,7 +1000,9 @@ class KMPDDP:
     visualization_utility : bool, (optional)
         If (True) generate the data needed by the visualization utilities of
         the package otherwise, if false the split_visualization and
-        interactive_visualization of the package can not be created.
+        interactive_visualization of the package can not be created. For the
+        'tsne' decomposition method does not support visualization because it
+        affects the correct execution of the kMeans-PDDP algorithm.
     random_seed : int, (optional)
         The random seed fed in the k-Means algorithm
     **decomposition_args :
@@ -987,7 +1037,11 @@ def __init__(
         self.decomposition_method = decomposition_method
         self.max_clusters_number = max_clusters_number
         self.min_sample_split = min_sample_split
-        self.visualization_utility = visualization_utility
+        if decomposition_method in ["tsne"]:
+            self.visualization_utility = False
+            warnings.warn("KMPDDP: does not support visualization for 'tsne'.")
+        else:
+            self.visualization_utility = visualization_utility
         self.random_seed = random_seed
         self.decomposition_args = decomposition_args
 
@@ -1304,6 +1358,23 @@ def random_seed(self, v):
             )
         self._random_seed = v
 
+    @property
+    def visualization_utility(self):
+        return self._visualization_utility
+
+    @visualization_utility.setter
+    def visualization_utility(self, v):
+        if v is not True and v is not False:
+            raise ValueError(
+                "KMPDDP: visualization_utility: Should be True or False"
+            )
+
+        if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]:
+            raise ValueError(
+                "KMPDDP: visualization_utility: Should be pca when visualization_utility is True"
+            )
+        self._visualization_utility = v
+
     @property
     def tree(self):
         return self._tree
@@ -1385,7 +1456,9 @@ class PDDP:
     visualization_utility : bool, (optional)
         If (True) generate the data needed by the visualization utilities of
         the package otherwise, if false the split_visualization and
-        interactive_visualization of the package can not be created.
+        interactive_visualization of the package can not be created. For the
+        'tsne' decomposition method does not support visualization because it
+        affects the correct execution of the PDDP algorithm.
     **decomposition_args :
         Arguments for each of the decomposition methods ("decomposition.PCA" as
         "pca", "decomposition.KernelPCA" as "kpca", "decomposition.FastICA" as
@@ -1417,7 +1490,11 @@ def __init__(
         self.decomposition_method = decomposition_method
         self.max_clusters_number = max_clusters_number
         self.min_sample_split = min_sample_split
-        self.visualization_utility = visualization_utility
+        if decomposition_method in ["tsne"]:
+            self.visualization_utility = False
+            warnings.warn("PDDP: does not support visualization for 'tsne'.")
+        else:
+            self.visualization_utility = visualization_utility
         self.decomposition_args = decomposition_args
 
     def fit(self, X):
@@ -1705,6 +1782,23 @@ def min_sample_split(self, v):
             )
         self._min_sample_split = v
 
+    @property
+    def visualization_utility(self):
+        return self._visualization_utility
+
+    @visualization_utility.setter
+    def visualization_utility(self, v):
+        if v is not True and v is not False:
+            raise ValueError(
+                "PDDP: visualization_utility: Should be True or False"
+            )
+
+        if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]:
+            raise ValueError(
+                "PDDP: visualization_utility: Should be pca when visualization_utility is True"
+            )
+        self._visualization_utility = v
+
     @property
     def tree(self):
         return self._tree
@@ -2123,19 +2217,22 @@ def output_matrix(self):
 
     @output_matrix.setter
     def output_matrix(self, v):
-        self._output_matrix = v
+        raise RuntimeError(
+            "BisectingKmeans: output_matrix: can only be generated and not to be assigned!"
+        )
 
     @property
     def labels_(self):
         labels_ = np.ones(np.size(self.X, 0))
         for i in self.tree.leaves():
             labels_[i.data["indices"]] = i.identifier
-        self.labels_ = labels_
-        return self._labels_
+        return labels_
 
     @labels_.setter
     def labels_(self, v):
-        self._labels_ = v
+        raise RuntimeError(
+            "BisectingKmeans: labels_: can only be generated and not to be assigned!"
+        )
 
 
 class MDH:
@@ -2162,7 +2259,7 @@ class MDH:
         not occur. [0,0.5) values are allowed.
     min_sample_split : int, optional
         The minimum number of points needed in a cluster for a split to occur.
-    random_seed : int, optional
+    random_state : int, optional
         The random seed to be used in the algorithm's execution.
 
     Attributes
@@ -2440,7 +2537,7 @@ def calculate_node_data(self, indices, key):
                     #   4. njev (number of jacobian/ gradient evaluations)
                     results, depth = util.md_sqp(initial_v_n_b, node_data, self.k)
 
-                    # If the algorithm terminated successfully try appending the append the solution
+                    # If the algorithm terminated successfully try to append the solution
                     if results.success:
                         v = results.x[:-1] / np.linalg.norm(results.x[:-1])
                         projection = np.dot(node_data, v).reshape(-1, 1)