From c4f951a9ee7897bb70ad4ee4c443e7b0320e58da Mon Sep 17 00:00:00 2001
From: ADBond <48208438+ADBond@users.noreply.github.com>
Date: Wed, 13 Nov 2024 16:52:39 +0000
Subject: [PATCH 1/4] test clustering works even if we have an empty edges
 table

---
 tests/test_clustering.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/test_clustering.py b/tests/test_clustering.py
index 2cd8bb8fd..ab85132fb 100644
--- a/tests/test_clustering.py
+++ b/tests/test_clustering.py
@@ -63,3 +63,37 @@ def test_clustering(test_helpers, dialect, link_type, input_pd_tables):
 
     df_predict = linker.inference.predict()
     linker.clustering.cluster_pairwise_predictions_at_threshold(df_predict, 0.95)
+
+
+@mark_with_dialects_excluding()
+def test_clustering_no_edges(test_helpers, dialect):
+    helper = test_helpers[dialect]
+
+    df = pd.DataFrame(
+        [
+            {"id": 1, "first_name": "Andy", "surname": "Bandy", "city": "London"},
+            {"id": 2, "first_name": "Andi", "surname": "Bandi", "city": "London"},
+            {"id": 3, "first_name": "Terry", "surname": "Berry", "city": "Glasgow"},
+            {"id": 4, "first_name": "Terri", "surname": "Berri", "city": "Glasgow"},
+        ]
+    )
+
+    settings = SettingsCreator(
+        link_type="dedupe_only",
+        comparisons=[
+            cl.ExactMatch("first_name"),
+            cl.ExactMatch("surname"),
+            cl.ExactMatch("city"),
+        ],
+        blocking_rules_to_generate_predictions=[
+            block_on("surname"),
+            block_on("first_name"),
+        ],
+        unique_id_column_name="id",
+    )
+    linker_input = helper.convert_frame(df)
+    linker = Linker(linker_input, settings, **helper.extra_linker_args())
+
+    # due to blocking rules, df_predict will be empty
+    df_predict = linker.inference.predict()
+    linker.clustering.cluster_pairwise_predictions_at_threshold(df_predict, 0.95)

From 69676d472adf7540656f25faccbdc71f0fa8fc9a Mon Sep 17 00:00:00 2001
From: ADBond <48208438+ADBond@users.noreply.github.com>
Date: Wed, 13 Nov 2024 16:58:15 +0000
Subject: [PATCH 2/4] duckdb - columns from information_schema

this handles the case where the table has no rows, and is also faster
---
 splink/internals/duckdb/dataframe.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/splink/internals/duckdb/dataframe.py b/splink/internals/duckdb/dataframe.py
index 787946e2a..e51a8d7d3 100644
--- a/splink/internals/duckdb/dataframe.py
+++ b/splink/internals/duckdb/dataframe.py
@@ -20,9 +20,14 @@ class DuckDBDataFrame(SplinkDataFrame):
 
     @property
     def columns(self) -> list[InputColumn]:
-        d = self.as_record_dict(1)[0]
+        sql = (
+            f"SELECT column_name FROM information_schema.columns "
+            f"WHERE table_name = '{self.physical_name}'"
+        )
+        col_strings = (self.db_api._execute_sql_against_backend(sql).to_df().to_dict())[
+            "column_name"
+        ].values()
 
-        col_strings = list(d.keys())
         return [InputColumn(c, sqlglot_dialect_str="duckdb") for c in col_strings]
 
     def validate(self):

From e7cc974ba05294ca67e6c917e94c1f2ada8e6339 Mon Sep 17 00:00:00 2001
From: ADBond <48208438+ADBond@users.noreply.github.com>
Date: Mon, 18 Nov 2024 12:03:44 +0000
Subject: [PATCH 3/4] changelog entry

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 27454c8db..c6366fb75 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+### Fixed
+
+- Clustering still works in DuckDB even if no edges are available ([#2510](https://github.com/moj-analytical-services/splink/pull/2510))
+
 ## [4.0.5] - 2024-11-06
 
 - Dataframes to be registered when using `compare_two_records`, to avoid problems with data typing (because the input data can have an explicit schema) ([#2493](https://github.com/moj-analytical-services/splink/pull/2493))

From d903d64bd39256235fc2a2263a2182502c711b8f Mon Sep 17 00:00:00 2001
From: ADBond <48208438+ADBond@users.noreply.github.com>
Date: Mon, 18 Nov 2024 12:04:16 +0000
Subject: [PATCH 4/4] updating some changelog info

---
 CHANGELOG.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c6366fb75..cbcb03d00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,10 +9,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- Various bugfixes for `debug_mode` ([#2481](https://github.com/moj-analytical-services/splink/pull/2481))
 - Clustering still works in DuckDB even if no edges are available ([#2510](https://github.com/moj-analytical-services/splink/pull/2510))
 
 ## [4.0.5] - 2024-11-06
 
+### Fixed
+
 - Dataframes to be registered when using `compare_two_records`, to avoid problems with data typing (because the input data can have an explicit schema) ([#2493](https://github.com/moj-analytical-services/splink/pull/2493))
 
 ## [4.0.4] - 2024-10-13
@@ -166,7 +169,12 @@ Major release - see our [blog](https://moj-analytical-services.github.io/splink/
 - Corrected path for Spark `.jar` file containing UDFs to work correctly for Spark < 3.0 ([#1622](https://github.com/moj-analytical-services/splink/pull/1622))
 - Spark UDF `damerau_levensthein` is now only registered for Spark >= 3.0, as it is not compatible with earlier versions ([#1622](https://github.com/moj-analytical-services/splink/pull/1622))
 
-[unreleased]: https://github.com/moj-analytical-services/splink/compare/4.0.0...HEAD
+[Unreleased]: https://github.com/moj-analytical-services/splink/compare/4.0.5...HEAD
+[4.0.5]: https://github.com/moj-analytical-services/splink/compare/4.0.4...4.0.5
+[4.0.4]: https://github.com/moj-analytical-services/splink/compare/4.0.3...4.0.4
+[4.0.3]: https://github.com/moj-analytical-services/splink/compare/4.0.2...4.0.3
+[4.0.2]: https://github.com/moj-analytical-services/splink/compare/4.0.1...4.0.2
+[4.0.1]: https://github.com/moj-analytical-services/splink/compare/4.0.0...4.0.1
 [4.0.0]: https://github.com/moj-analytical-services/splink/compare/3.9.15...4.0.0
 [3.9.15]: https://github.com/moj-analytical-services/splink/compare/3.9.14...3.9.15
 [3.9.14]: https://github.com/moj-analytical-services/splink/compare/3.9.13...3.9.14