Skip to content

Commit

Permalink
Merge pull request #2514 from moj-analytical-services/maint/deps
Browse files Browse the repository at this point in the history
Update lockfile + fixes for latest package versions
  • Loading branch information
ADBond authored Nov 21, 2024
2 parents 1c8ad3a + af18a92 commit 9161712
Show file tree
Hide file tree
Showing 7 changed files with 828 additions and 628 deletions.
1,417 changes: 799 additions & 618 deletions poetry.lock

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ sqlalchemy = ">=1.4.0"
# temporarily use binary version, to avoid issues with pg_config path
psycopg2-binary = ">=2.8.0"
igraph = ">=0.11.2"
# 2.2.2 is first version that supports numpy >= 2.0.0
# if we don't constrain this, we can end up with numpy >= 2.0.0 and pandas < 2.2.2
# particularly in pythons 3.10 and 3.11
pandas = [
{version= ">1.3.5", python = ">=3.8"},
{version=">=2.2.2", python = ">=3.10"},
]

[tool.poetry.group.linting]
[tool.poetry.group.linting.dependencies]
Expand Down
2 changes: 1 addition & 1 deletion scripts/postgres_docker/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# run from root

# add -d for detached mode (run in background)
docker-compose -f scripts/postgres/docker-compose.yaml up
docker-compose -f scripts/postgres_docker/docker-compose.yaml up
2 changes: 1 addition & 1 deletion splink/internals/dialects.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def default_date_format(self):

@property
def default_timestamp_format(self):
return "%Y-%m-%dT%H:%M:%S%Z"
return "%Y-%m-%dT%H:%M:%SZ"

def _try_parse_date_raw(self, name: str, date_format: str = None) -> str:
if date_format is None:
Expand Down
3 changes: 1 addition & 2 deletions splink/internals/spark/spark_helpers/custom_spark_dialect.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from sqlglot import exp
from sqlglot.dialects import Dialect, Spark
from sqlglot.generator import Generator as GeneratorSqlglot


def cast_as_double_edit(self, expression):
Expand All @@ -25,7 +24,7 @@ class Parser(Spark.Parser):
**Spark.Parser.FUNCTIONS,
}

class Generator(GeneratorSqlglot):
class Generator(Spark.Generator):
TYPE_MAPPING = {
**Spark.Generator.TYPE_MAPPING,
}
Expand Down
19 changes: 16 additions & 3 deletions tests/test_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,28 @@
from tests.basic_settings import get_settings_dict

df = pd.read_csv("./tests/datasets/fake_1000_from_splink_demos.csv")
__splink__dummy_frame = pd.DataFrame(["id"])
_dummy_pd_frame = pd.DataFrame(["id"])


def make_mock_execute(db_api):
# creates a mock version of linker._sql_to_splink_dataframe,
# so we can count calls
dummy_splink_df = DuckDBDataFrame("template", "__splink__dummy_frame", db_api)
dummy_table_name = "__splink__dummy_frame"
dummy_splink_df = DuckDBDataFrame("template", dummy_table_name, db_api)

def register_and_return_dummy_frame(*args, **kwargs):
# need to make sure that the dummy frame always exist in the context
# we are running tests
# not actually interested in the frame itself, but needs to exist in
# connexion in case a method tries to access it
db_api._con.sql(
f"CREATE TABLE IF NOT EXISTS {dummy_table_name} AS "
f"SELECT * FROM _dummy_pd_frame"
)
return dummy_splink_df

mock_execute = create_autospec(
db_api._sql_to_splink_dataframe, return_value=dummy_splink_df
db_api._sql_to_splink_dataframe, side_effect=register_and_return_dummy_frame
)
return mock_execute

Expand Down
6 changes: 3 additions & 3 deletions tests/test_sql_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def move_l_r_test(br, expected):
res = move_l_r_table_prefix_to_column_suffix(br)
res = move_l_r_table_prefix_to_column_suffix(br, sqlglot_dialect="duckdb")
assert res.lower() == expected.lower()


Expand All @@ -20,8 +20,8 @@ def test_move_l_r_table_prefix_to_column_suffix():
expected = "first_name_l = first_name_r"
move_l_r_test(br, expected)

br = "substr(l.last_name, 1, 2) = substr(r.last_name, 1, 2)"
expected = "substr(last_name_l, 1, 2) = substr(last_name_r, 1, 2)"
br = "substring(l.last_name, 1, 2) = substring(r.last_name, 1, 2)"
expected = "substring(last_name_l, 1, 2) = substring(last_name_r, 1, 2)"
move_l_r_test(br, expected)

br = "l.name['first'] = r.name['first'] and levenshtein(l.dob, r.dob) < 2"
Expand Down

0 comments on commit 9161712

Please sign in to comment.