awslabs · momonga-ml · Feb 29, 2024 · Nov 24, 2023 · Dec 9, 2023 · Dec 17, 2023
diff --git a/denseclus/DenseClus.py b/denseclus/DenseClus.py
@@ -593,15 +593,15 @@ def predict(self, df_new: pd.DataFrame) -> np.ndarray:
         )
         return predictions
 
-    def evaluate(self) -> np.array:
+    def evaluate(self, log_dbcv=False) -> np.array:
         """Evaluates the cluster and returns the cluster assigned to each row.
 
          This is a wrapper function for HDBSCAN. It outputs the cluster labels
          that HDBSCAN converged on.
 
          Parameters
          ----------
-         None : None
+         log_dbcv (bool) : Whether to log DBCV scores. Defaults to False
 
         Returns
         -------
@@ -612,14 +612,18 @@ def evaluate(self) -> np.array:
         clustered = labels >= 0
 
         if isinstance(self.hdbscan_, dict) or self.umap_combine_method == "ensemble":
-            print(f"DBCV score {self.hdbscan_['hdb_numerical'].relative_validity_}")
-            print(f"DBCV score {self.hdbscan_['hdb_categorical'].relative_validity_}")
+            if log_dbcv:
+                print(f"DBCV numerical score {self.hdbscan_['hdb_numerical'].relative_validity_}")
+                print(
+                    f"DBCV categorical score {self.hdbscan_['hdb_categorical'].relative_validity_}"
+                )
             embedding_len = self.numerical_umap_.embedding_.shape[0]
             coverage = np.sum(clustered) / embedding_len
             print(f"Coverage {coverage}")
             return labels
 
-        print(f"DBCV score {self.hdbscan_.relative_validity_}")
+        if log_dbcv:
+            print(f"DBCV score {self.hdbscan_.relative_validity_}")
         embedding_len = self.mapper_.embedding_.shape[0]
         coverage = np.sum(clustered) / embedding_len
         print(f"Coverage {coverage}")

diff --git a/notebooks/02_TuningWithHDBSCAN.ipynb b/notebooks/02_TuningWithHDBSCAN.ipynb
@@ -365,6 +365,8 @@
    ],
    "source": [
     "# we will make our own scorer for DBCV\n",
+    "\n",
+    "\n",
     "def dbcv_score(X, labels):\n",
     "    return validity_index(X, labels)\n",
     "\n",

diff --git a/notebooks/DenseClusImplentation.ipynb b/notebooks/DenseClusImplentation.ipynb
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,7 +1,7 @@
 black==23.11.0
 coverage==7.3.2
-mypy==1.7.0
-nbqa==1.7.0
+mypy==1.7.1
+nbqa==1.7.1
 pre-commit==3.5.0
 pylint==3.0.2
 pytest==7.4.3

diff --git a/requirements.txt b/requirements.txt
@@ -3,4 +3,5 @@ numpy>=1.20.2
 hdbscan>=0.8.27
 numba>=0.51.2
 pandas>=1.2.4
-scikit_learn>=0.24.2
+scikit_learn>=0.24.2
+seaborn>=0.13.0