diff --git a/docs/conf.py b/docs/conf.py index 0bf0d74..e858e52 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ author = 'Panagiotis Anagnostou' # The full version, including alpha/beta/rc tags -release = '0.4.0' +release = '0.4.2' # -- General configuration --------------------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index 4339777..45ca726 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -71,13 +71,16 @@ Acknowledgments This project has received funding from the Hellenic Foundation for Research and Innovation (HFRI), under grant agreement No 1901. + +Contents +------------- + .. toctree:: :maxdepth: 2 - :hidden: + self modules examples * :ref:`genindex` * :ref:`modindex` -* :ref:`search` diff --git a/setup.py b/setup.py index dd2fb7a..eebb550 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() -__version__ = "0.4.1" +__version__ = "0.4.2" setuptools.setup( name="HiPart", diff --git a/src/HiPart/__init__.py b/src/HiPart/__init__.py index 4b75cff..174aaf8 100644 --- a/src/HiPart/__init__.py +++ b/src/HiPart/__init__.py @@ -37,7 +37,7 @@ from KDEpy.TreeKDE import TreeKDE from KDEpy.FFTKDE import FFTKDE -__version__ = "0.4.1" +__version__ = "0.4.2" __author__ = "Panagiotis Anagnostou" TreeKDE = TreeKDE diff --git a/src/HiPart/__utility_functions.py b/src/HiPart/__utility_functions.py index 702893b..e6c043d 100644 --- a/src/HiPart/__utility_functions.py +++ b/src/HiPart/__utility_functions.py @@ -69,15 +69,18 @@ def execute_decomposition_method( Parameters ---------- - two_dimentions data_matrix : numpy.ndarray The data matrix contains all the data for the samples. decomposition_method : str One of 'kpca', 'pca' and 'ica' the decomposition methods supported by this software. + two_dimentions : bool + If True the projection will be on the first two components of the 'pca' + and 'ica' methods. The 'kpca' and 'tsne' methods will be projected only + on one dimension because of the nature. decomposition_args : dict Arguments to use by each of the decomposition methods utilized by the - HIDIV package. + HiPart package. Returns ------- @@ -87,6 +90,12 @@ def execute_decomposition_method( """ if two_dimentions: + if decomposition_method in ["tsne"]: + raise ValueError( + ": The decomposition method (" + + decomposition_method + + ") cannot be executed correctly for two dimentions!" + ) n_of_dimentions = 2 else: n_of_dimentions = 1 diff --git a/src/HiPart/clustering.py b/src/HiPart/clustering.py index 05e5305..6ad39e8 100644 --- a/src/HiPart/clustering.py +++ b/src/HiPart/clustering.py @@ -27,6 +27,7 @@ import HiPart.__utility_functions as util import numpy as np import statsmodels.api as sm +import warnings from KDEpy import FFTKDE from scipy import stats @@ -61,7 +62,9 @@ class DePDDP: visualization_utility : bool, (optional) If (True) generate the data needed by the visualization utilities of the package otherwise, if false the split_visualization and - interactive_visualization of the package can not be created. + interactive_visualization of the package can not be created. For the + 'tsne' decomposition method does not support visualization because it + affects the correct execution of the dePDDP algorithm. **decomposition_args : Arguments for each of the decomposition methods ("decomposition.PCA" as "pca", "decomposition.KernelPCA" as "kpca", "decomposition.FastICA" as @@ -97,7 +100,11 @@ def __init__( self.bandwidth_scale = bandwidth_scale self.percentile = percentile self.min_sample_split = min_sample_split - self.visualization_utility = visualization_utility + if decomposition_method in ["tsne"]: + self.visualization_utility = False + warnings.warn("DePDDP: does not support visualization for 'tsne'.") + else: + self.visualization_utility = visualization_utility self.decomposition_args = decomposition_args def fit(self, X): @@ -430,6 +437,24 @@ def min_sample_split(self, v): ) self._min_sample_split = v + @property + def visualization_utility(self): + return self._visualization_utility + + @visualization_utility.setter + def visualization_utility(self, v): + if v is not True and v is not False: + raise ValueError( + "DePDDP: visualization_utility: Should be True or False" + ) + + if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]: + raise ValueError( + "DePDDP: visualization_utility: 'tsne' method is can't be used" + + " with the visualization utility." + ) + self._visualization_utility = v + @property def tree(self): return self._tree @@ -517,7 +542,9 @@ class IPDDP: visualization_utility : bool, (optional) If (True) generate the data needed by the visualization utilities of the package otherwise, if false the split_visualization and - interactive_visualization of the package can not be created. + interactive_visualization of the package can not be created. For the + 'tsne' decomposition method does not support visualization because it + affects the correct execution of the iPDDP algorithm. **decomposition_args : Arguments for each of the decomposition methods ("decomposition.PCA" as "pca", "decomposition.KernelPCA" as "kpca", "decomposition.FastICA" as @@ -551,7 +578,11 @@ def __init__( self.max_clusters_number = max_clusters_number self.percentile = percentile self.min_sample_split = min_sample_split - self.visualization_utility = visualization_utility + if decomposition_method in ["tsne"]: + self.visualization_utility = False + warnings.warn("IPDDP: does not support visualization for 'tsne'.") + else: + self.visualization_utility = visualization_utility self.decomposition_args = decomposition_args def fit(self, X): @@ -868,6 +899,23 @@ def min_sample_split(self, v): ) self._min_sample_split = v + @property + def visualization_utility(self): + return self._visualization_utility + + @visualization_utility.setter + def visualization_utility(self, v): + if v is not True and v is not False: + raise ValueError( + "IPDDP: visualization_utility: Should be True or False" + ) + + if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]: + raise ValueError( + "IPDDP: visualization_utility: Should be pca when visualization_utility is True." + ) + self._visualization_utility = v + @property def tree(self): return self._tree @@ -952,7 +1000,9 @@ class KMPDDP: visualization_utility : bool, (optional) If (True) generate the data needed by the visualization utilities of the package otherwise, if false the split_visualization and - interactive_visualization of the package can not be created. + interactive_visualization of the package can not be created. For the + 'tsne' decomposition method does not support visualization because it + affects the correct execution of the kMeans-PDDP algorithm. random_seed : int, (optional) The random seed fed in the k-Means algorithm **decomposition_args : @@ -987,7 +1037,11 @@ def __init__( self.decomposition_method = decomposition_method self.max_clusters_number = max_clusters_number self.min_sample_split = min_sample_split - self.visualization_utility = visualization_utility + if decomposition_method in ["tsne"]: + self.visualization_utility = False + warnings.warn("KMPDDP: does not support visualization for 'tsne'.") + else: + self.visualization_utility = visualization_utility self.random_seed = random_seed self.decomposition_args = decomposition_args @@ -1304,6 +1358,23 @@ def random_seed(self, v): ) self._random_seed = v + @property + def visualization_utility(self): + return self._visualization_utility + + @visualization_utility.setter + def visualization_utility(self, v): + if v is not True and v is not False: + raise ValueError( + "KMPDDP: visualization_utility: Should be True or False" + ) + + if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]: + raise ValueError( + "KMPDDP: visualization_utility: Should be pca when visualization_utility is True" + ) + self._visualization_utility = v + @property def tree(self): return self._tree @@ -1385,7 +1456,9 @@ class PDDP: visualization_utility : bool, (optional) If (True) generate the data needed by the visualization utilities of the package otherwise, if false the split_visualization and - interactive_visualization of the package can not be created. + interactive_visualization of the package can not be created. For the + 'tsne' decomposition method does not support visualization because it + affects the correct execution of the PDDP algorithm. **decomposition_args : Arguments for each of the decomposition methods ("decomposition.PCA" as "pca", "decomposition.KernelPCA" as "kpca", "decomposition.FastICA" as @@ -1417,7 +1490,11 @@ def __init__( self.decomposition_method = decomposition_method self.max_clusters_number = max_clusters_number self.min_sample_split = min_sample_split - self.visualization_utility = visualization_utility + if decomposition_method in ["tsne"]: + self.visualization_utility = False + warnings.warn("PDDP: does not support visualization for 'tsne'.") + else: + self.visualization_utility = visualization_utility self.decomposition_args = decomposition_args def fit(self, X): @@ -1705,6 +1782,23 @@ def min_sample_split(self, v): ) self._min_sample_split = v + @property + def visualization_utility(self): + return self._visualization_utility + + @visualization_utility.setter + def visualization_utility(self, v): + if v is not True and v is not False: + raise ValueError( + "PDDP: visualization_utility: Should be True or False" + ) + + if v is True and self.decomposition_method not in ["pca", "ica", "kpca"]: + raise ValueError( + "PDDP: visualization_utility: Should be pca when visualization_utility is True" + ) + self._visualization_utility = v + @property def tree(self): return self._tree @@ -2123,19 +2217,22 @@ def output_matrix(self): @output_matrix.setter def output_matrix(self, v): - self._output_matrix = v + raise RuntimeError( + "BisectingKmeans: output_matrix: can only be generated and not to be assigned!" + ) @property def labels_(self): labels_ = np.ones(np.size(self.X, 0)) for i in self.tree.leaves(): labels_[i.data["indices"]] = i.identifier - self.labels_ = labels_ - return self._labels_ + return labels_ @labels_.setter def labels_(self, v): - self._labels_ = v + raise RuntimeError( + "BisectingKmeans: labels_: can only be generated and not to be assigned!" + ) class MDH: @@ -2162,7 +2259,7 @@ class MDH: not occur. [0,0.5) values are allowed. min_sample_split : int, optional The minimum number of points needed in a cluster for a split to occur. - random_seed : int, optional + random_state : int, optional The random seed to be used in the algorithm's execution. Attributes @@ -2440,7 +2537,7 @@ def calculate_node_data(self, indices, key): # 4. njev (number of jacobian/ gradient evaluations) results, depth = util.md_sqp(initial_v_n_b, node_data, self.k) - # If the algorithm terminated successfully try appending the append the solution + # If the algorithm terminated successfully try to append the solution if results.success: v = results.x[:-1] / np.linalg.norm(results.x[:-1]) projection = np.dot(node_data, v).reshape(-1, 1) diff --git a/tests/test_package.py b/tests/test_package.py index 2b1be0a..1411409 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -109,8 +109,18 @@ def test_depddp_parameter_errors(): DePDDP(min_sample_split=-5) except Exception: success_score += 1 + try: + obj = DePDDP() + obj.output_matrix = np.array([1, 2, 3]) + except Exception: + success_score += 1 + try: + obj = DePDDP() + obj.labels_ = np.array([1, 2, 3]) + except Exception: + success_score += 1 - assert success_score == 11 + assert success_score == 13 def test_ipddp_parameter_errors(datadir): @@ -139,8 +149,18 @@ def test_ipddp_parameter_errors(datadir): IPDDP(min_sample_split=-5) except Exception: success_score += 1 + try: + obj = IPDDP() + obj.output_matrix = np.array([1, 2, 3]) + except Exception: + success_score += 1 + try: + obj = IPDDP() + obj.labels_ = np.array([1, 2, 3]) + except Exception: + success_score += 1 - assert success_score == 9 + assert success_score == 11 def test_kmpddp_parameter_errors(datadir): @@ -169,8 +189,18 @@ def test_kmpddp_parameter_errors(datadir): KMPDDP(min_sample_split=-5) except Exception: success += 1 + try: + obj = KMPDDP() + obj.output_matrix = np.array([1, 2, 3]) + except Exception: + success += 1 + try: + obj = KMPDDP() + obj.labels_ = np.array([1, 2, 3]) + except Exception: + success += 1 - assert success == 9 + assert success == 11 def test_pddp_parameter_errors(datadir): @@ -194,8 +224,18 @@ def test_pddp_parameter_errors(datadir): PDDP(min_sample_split=-5) except Exception: success_score += 1 + try: + obj = PDDP() + obj.output_matrix = np.array([1, 2, 3]) + except Exception: + success_score += 1 + try: + obj = PDDP() + obj.labels_ = np.array([1, 2, 3]) + except Exception: + success_score += 1 - assert success_score == 7 + assert success_score == 9 def test_bicecting_kmeans_parameter_errors(): @@ -218,8 +258,18 @@ def test_bicecting_kmeans_parameter_errors(): BisectingKmeans(min_sample_split=-5) except Exception: success_score += 1 + try: + obj = BisectingKmeans() + obj.output_matrix = np.array([1, 2, 3]) + except Exception: + success_score += 1 + try: + obj = BisectingKmeans() + obj.labels_ = np.array([1, 2, 3]) + except Exception: + success_score += 1 - assert success_score == 6 + assert success_score == 8 def test_mdh_parameter_errors(): @@ -257,8 +307,18 @@ def test_mdh_parameter_errors(): MDH(min_sample_split=-5) except Exception: success_score += 1 + try: + obj = MDH() + obj.output_matrix = np.array([1, 2, 3]) + except Exception: + success_score += 1 + try: + obj = MDH() + obj.labels_ = np.array([1, 2, 3]) + except Exception: + success_score += 1 - assert success_score == 12 + assert success_score == 14 def test_depddp_labels__return_type_and_form(datadir): diff --git a/tests/test_package/test_data.dump b/tests/test_package/test_data.dump index 94d9fab..d7a1f11 100644 Binary files a/tests/test_package/test_data.dump and b/tests/test_package/test_data.dump differ