From 427bf944f74a303f829f6bf7015807d9617e0805 Mon Sep 17 00:00:00 2001
From: Alberto Islas
Date: Wed, 20 Nov 2024 12:45:30 -0600
Subject: [PATCH 1/7] fix(api): Added score field to V4 Search API results
Fixes: #4312
---
cl/lib/test_helpers.py | 1 +
cl/search/api_serializers.py | 54 ++++++++++++++++++++++++++----------
cl/search/api_utils.py | 2 ++
cl/tests/cases.py | 23 +++++++++++----
4 files changed, 61 insertions(+), 19 deletions(-)
diff --git a/cl/lib/test_helpers.py b/cl/lib/test_helpers.py
index 6dfd2fccea..c795f99d9e 100644
--- a/cl/lib/test_helpers.py
+++ b/cl/lib/test_helpers.py
@@ -476,6 +476,7 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
"timestamp": lambda x: x["result"]
.date_created.isoformat()
.replace("+00:00", "Z"),
+ "score": lambda x: {"bm25": None},
}
v4_recap_meta_keys = v4_meta_keys.copy()
diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py
index 1f9cbb7d75..52d0bd6cd4 100644
--- a/cl/search/api_serializers.py
+++ b/cl/search/api_serializers.py
@@ -435,7 +435,11 @@ class Meta:
)
-class MetaDataSerializer(serializers.Serializer):
+class ScoreDataSerializer(serializers.Serializer):
+ bm25 = serializers.FloatField(read_only=True, source="bm25_score")
+
+
+class BaseMetaDataSerializer(serializers.Serializer):
"""The metadata serializer V4 Search API."""
timestamp = TimeStampField(read_only=True, default_timezone=timezone.utc)
@@ -444,7 +448,15 @@ class MetaDataSerializer(serializers.Serializer):
)
-class RECAPMetaDataSerializer(MetaDataSerializer):
+class MainDocumentMetaDataSerializer(BaseMetaDataSerializer):
+ """The metadata serializer V4 Search API for main documents.
+ Includes the score field.
+ """
+
+ score = ScoreDataSerializer(source="*", read_only=True)
+
+
+class RECAPMetaDataSerializer(MainDocumentMetaDataSerializer):
"""The metadata serializer for the RECAP search type includes the
additional more_docs field.
"""
@@ -454,10 +466,10 @@ class RECAPMetaDataSerializer(MetaDataSerializer):
)
-class MetaMixin(serializers.Serializer):
- """Mixin to add nested metadata serializer."""
+class MainMetaMixin(serializers.Serializer):
+ """Mixin to add nested metadata serializer for main documents."""
- meta = MetaDataSerializer(source="*", read_only=True)
+ meta = MainDocumentMetaDataSerializer(source="*", read_only=True)
class RECAPMetaMixin(serializers.Serializer):
@@ -466,7 +478,13 @@ class RECAPMetaMixin(serializers.Serializer):
meta = RECAPMetaDataSerializer(source="*", read_only=True)
-class BaseRECAPDocumentESResultSerializer(MetaMixin, DocumentSerializer):
+class ChildMetaMixin(serializers.Serializer):
+ """Mixin to add nested metadata serializer for child documents."""
+
+ meta = BaseMetaDataSerializer(source="*", read_only=True)
+
+
+class BaseRECAPDocumentESResultSerializer(DocumentSerializer):
"""The base serializer class for RECAP_DOCUMENT search type results."""
# Fields from the RECAPDocument
@@ -505,6 +523,12 @@ class Meta:
)
+class NestedRECAPDocumentESResultSerializer(
+ BaseRECAPDocumentESResultSerializer, ChildMetaMixin
+):
+ """Mixin to add nested metadata serializer for nested Recap documents."""
+
+
class BaseDocketESResultSerializer(DocumentSerializer):
"""The serializer class for DOCKETS Search type results."""
@@ -541,25 +565,27 @@ class Meta:
)
-class RECAPDocumentESResultSerializer(BaseRECAPDocumentESResultSerializer):
+class RECAPDocumentESResultSerializer(
+ BaseRECAPDocumentESResultSerializer, MainMetaMixin
+):
"""The serializer for RECAP_DOCUMENT search type results."""
docket_id = serializers.IntegerField(read_only=True)
-class DocketESResultSerializer(MetaMixin, BaseDocketESResultSerializer):
+class DocketESResultSerializer(MainMetaMixin, BaseDocketESResultSerializer):
"""The serializer class for DOCKETS Search type results."""
class RECAPESResultSerializer(RECAPMetaMixin, BaseDocketESResultSerializer):
"""The serializer class for RECAP search type results."""
- recap_documents = BaseRECAPDocumentESResultSerializer(
+ recap_documents = NestedRECAPDocumentESResultSerializer(
many=True, read_only=True, source="child_docs"
)
-class OpinionDocumentESResultSerializer(MetaMixin, DocumentSerializer):
+class OpinionDocumentESResultSerializer(ChildMetaMixin, DocumentSerializer):
"""The serializer for OpinionDocument results."""
snippet = HighlightedField(read_only=True, source="text")
@@ -579,7 +605,7 @@ class Meta:
)
-class OpinionClusterESResultSerializer(MetaMixin, DocumentSerializer):
+class OpinionClusterESResultSerializer(MainMetaMixin, DocumentSerializer):
"""The serializer for OpinionCluster Search results."""
opinions = OpinionDocumentESResultSerializer(
@@ -609,7 +635,7 @@ class Meta:
)
-class PositionESResultSerializer(MetaMixin, DocumentSerializer):
+class PositionESResultSerializer(ChildMetaMixin, DocumentSerializer):
"""The serializer for Positions Search results."""
class Meta:
@@ -644,7 +670,7 @@ class Meta:
)
-class PersonESResultSerializer(MetaMixin, DocumentSerializer):
+class PersonESResultSerializer(MainMetaMixin, DocumentSerializer):
"""The serializer for Person Search results."""
name = HighlightedField(read_only=True)
@@ -674,7 +700,7 @@ class Meta:
)
-class OAESResultSerializer(MetaMixin, DocumentSerializer):
+class OAESResultSerializer(MainMetaMixin, DocumentSerializer):
"""The serializer for V4 Oral argument results."""
snippet = HighlightedField(read_only=True, source="text")
diff --git a/cl/search/api_utils.py b/cl/search/api_utils.py
index 122b06f944..a53487ff08 100644
--- a/cl/search/api_utils.py
+++ b/cl/search/api_utils.py
@@ -486,6 +486,8 @@ def process_results(self, results: Response) -> None:
)
)
result["child_docs"] = child_result_objects
+ # Include the ES main document score as bm25_score.
+ result["bm25_score"] = result.meta.score
if self.reverse:
# If doing backward pagination, reverse the results of the current
diff --git a/cl/tests/cases.py b/cl/tests/cases.py
index 5b0c03e374..d28ce1c4c0 100644
--- a/cl/tests/cases.py
+++ b/cl/tests/cases.py
@@ -262,11 +262,20 @@ async def _compare_field(
meta_expected_value = await sync_to_async(get_meta_expected_value)(
content_to_compare
)
- self.assertEqual(
- meta_value,
- meta_expected_value,
- f"The field '{meta_field}' does not match.",
- )
+ if meta_field == "score":
+ # Special case for the score field. Only confirm the presence of
+ # keys and avoid comparing values, as they differ in each response.
+ self.assertEqual(
+ set(meta_value.keys()),
+ set(meta_expected_value.keys()),
+ f"The keys in field '{meta_field}' do not match.",
+ )
+ else:
+ self.assertEqual(
+ meta_value,
+ meta_expected_value,
+ f"The field '{meta_field}' does not match.",
+ )
async def _test_api_fields_content(
self,
@@ -296,6 +305,10 @@ async def _test_api_fields_content(
meta_value,
) in child_value.items():
with self.subTest(meta_field=meta_field):
+ self.assertFalse(
+ meta_field == "score",
+ msg="score key should not be present in nested documents",
+ )
await self._compare_field(
meta_field,
meta_value,
From 0114e211d45f8ca677cb2ab997de838b6b3b3455 Mon Sep 17 00:00:00 2001
From: Alberto Islas
Date: Wed, 20 Nov 2024 13:57:30 -0600
Subject: [PATCH 2/7] fix(webhook): Removed score field from RECAP Search
Webhooks
---
cl/api/tasks.py | 4 ++--
cl/search/api_serializers.py | 30 ++++++++++++++++++++++--------
cl/tests/cases.py | 10 ++++++++++
3 files changed, 34 insertions(+), 10 deletions(-)
diff --git a/cl/api/tasks.py b/cl/api/tasks.py
index 22db8820f7..ec1c5971ac 100644
--- a/cl/api/tasks.py
+++ b/cl/api/tasks.py
@@ -14,7 +14,7 @@
from cl.corpus_importer.api_serializers import DocketEntrySerializer
from cl.lib.elasticsearch_utils import merge_highlights_into_result
from cl.search.api_serializers import (
- RECAPESResultSerializer,
+ RECAPESWebhookResultSerializer,
V3OAESResultSerializer,
)
from cl.search.api_utils import ResultObject
@@ -180,7 +180,7 @@ def send_search_alert_webhook_es(
meta_hl,
result,
)
- serialized_results = RECAPESResultSerializer(
+ serialized_results = RECAPESWebhookResultSerializer(
results, many=True
).data
case _:
diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py
index 52d0bd6cd4..f27053e95d 100644
--- a/cl/search/api_serializers.py
+++ b/cl/search/api_serializers.py
@@ -462,20 +462,24 @@ class RECAPMetaDataSerializer(MainDocumentMetaDataSerializer):
"""
more_docs = serializers.BooleanField(
- read_only=True, source="child_remaining"
+ read_only=True, source="child_remaining", default=False
)
-class MainMetaMixin(serializers.Serializer):
- """Mixin to add nested metadata serializer for main documents."""
+class RECAPWebhookMetaDataSerializer(BaseMetaDataSerializer):
+ """The metadata serializer for the RECAP search Webhook that includes the
+ additional more_docs field without the score field.
+ """
- meta = MainDocumentMetaDataSerializer(source="*", read_only=True)
+ more_docs = serializers.BooleanField(
+ read_only=True, source="child_remaining", default=False
+ )
-class RECAPMetaMixin(serializers.Serializer):
- """Mixin to add nested metadata serializer for the RECAP search type."""
+class MainMetaMixin(serializers.Serializer):
+ """Mixin to add nested metadata serializer for main documents."""
- meta = RECAPMetaDataSerializer(source="*", read_only=True)
+ meta = MainDocumentMetaDataSerializer(source="*", read_only=True)
class ChildMetaMixin(serializers.Serializer):
@@ -577,12 +581,22 @@ class DocketESResultSerializer(MainMetaMixin, BaseDocketESResultSerializer):
"""The serializer class for DOCKETS Search type results."""
-class RECAPESResultSerializer(RECAPMetaMixin, BaseDocketESResultSerializer):
+class RECAPESResultSerializer(BaseDocketESResultSerializer):
"""The serializer class for RECAP search type results."""
recap_documents = NestedRECAPDocumentESResultSerializer(
many=True, read_only=True, source="child_docs"
)
+ meta = RECAPMetaDataSerializer(source="*", read_only=True)
+
+
+class RECAPESWebhookResultSerializer(BaseDocketESResultSerializer):
+ """The serializer class for RECAP search Webhooks results."""
+
+ recap_documents = NestedRECAPDocumentESResultSerializer(
+ many=True, read_only=True, source="child_docs"
+ )
+ meta = RECAPWebhookMetaDataSerializer(source="*", read_only=True)
class OpinionDocumentESResultSerializer(ChildMetaMixin, DocumentSerializer):
diff --git a/cl/tests/cases.py b/cl/tests/cases.py
index d28ce1c4c0..2f0db20e88 100644
--- a/cl/tests/cases.py
+++ b/cl/tests/cases.py
@@ -657,6 +657,11 @@ def _assert_webhook_hit_hl(
if webhook["payload"]["alert"]["name"] == alert_title:
hit = webhook["payload"]["results"][0]
if child_field:
+ self.assertNotIn(
+ "score",
+ hit["recap_documents"][0]["meta"],
+ msg="score shouldn't be present on webhook nested documents",
+ )
child_field_content = hit["recap_documents"][0][field_name]
self.assertIn(
hl_expected,
@@ -665,6 +670,11 @@ def _assert_webhook_hit_hl(
% field_name,
)
else:
+ self.assertNotIn(
+ "score",
+ hit["meta"],
+ msg="score shouldn't be present on webhook main document",
+ )
parent_field_content = hit[field_name]
self.assertIn(
hl_expected,
From c37e051b52c4ee1f4fd9159b0bffce5d118e1cc3 Mon Sep 17 00:00:00 2001
From: Alberto Islas
Date: Wed, 20 Nov 2024 14:25:23 -0600
Subject: [PATCH 3/7] fix(api): Updated documentation related to score field
---
cl/api/templates/search-api-docs-vlatest.html | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/cl/api/templates/search-api-docs-vlatest.html b/cl/api/templates/search-api-docs-vlatest.html
index 3c0ae0b083..d2a65537c1 100644
--- a/cl/api/templates/search-api-docs-vlatest.html
+++ b/cl/api/templates/search-api-docs-vlatest.html
@@ -93,7 +93,10 @@ Basic Usage
"lexisCite": "",
"meta": {
"timestamp": "2024-06-22T10:26:35.320787Z",
- "date_created": "2022-06-26T23:24:18.926040Z"
+ "date_created": "2022-06-26T23:24:18.926040Z",
+ "score": {
+ "bm25": 2.1369965
+ }
},
"neutralCite": "",
"non_participating_judge_ids": [],
@@ -249,6 +252,10 @@ Special Notes
This field only displays Opinion text content.
+
+ The meta
field in main documents contains the score
field, which is currently a hash that includes the bm25
score used by Elasticsearch to rank results. Additional scores may be introduced in the future.
+
+
From 924960ca0ff265c3e06b25322f5e6f35df6f2a03 Mon Sep 17 00:00:00 2001
From: flooie <6464529+flooie@users.noreply.github.com>
Date: Thu, 21 Nov 2024 17:17:16 +0000
Subject: [PATCH 4/7] Update freelawproject dependencies
---
poetry.lock | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/poetry.lock b/poetry.lock
index 47de93c49c..d062c4b304 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2320,13 +2320,13 @@ setuptools = "*"
[[package]]
name = "juriscraper"
-version = "2.6.40"
+version = "2.6.42"
description = "An API to scrape American court websites for metadata."
optional = false
python-versions = "*"
files = [
- {file = "juriscraper-2.6.40-py27-none-any.whl", hash = "sha256:961987e618293545ea227bdf0b90af90a6fa28cafeab939b2633d253392559d8"},
- {file = "juriscraper-2.6.40.tar.gz", hash = "sha256:63a53d5345e5303ba90bd2c6939a31c1c67eba3a2b64a94a91847be502547aea"},
+ {file = "juriscraper-2.6.42-py27-none-any.whl", hash = "sha256:010d6578714f3262f16d15bee709872e0584381b93988d0d504bdb774f43b403"},
+ {file = "juriscraper-2.6.42.tar.gz", hash = "sha256:293299112201ed217a1eccb05d8cba01aa208e8ed9686d3a6b90c24e752f51fb"},
]
[package.dependencies]
From 6da6a4600d0fe4d400bd41b7be103c43357d9616 Mon Sep 17 00:00:00 2001
From: Alberto Islas
Date: Thu, 21 Nov 2024 11:18:24 -0600
Subject: [PATCH 5/7] fix(docs): Tweaked the language regarding the score field
in the Search API documentation
---
cl/api/templates/search-api-docs-vlatest.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cl/api/templates/search-api-docs-vlatest.html b/cl/api/templates/search-api-docs-vlatest.html
index d2a65537c1..19d3c716be 100644
--- a/cl/api/templates/search-api-docs-vlatest.html
+++ b/cl/api/templates/search-api-docs-vlatest.html
@@ -253,7 +253,7 @@ Special Notes
- The meta
field in main documents contains the score
field, which is currently a hash that includes the bm25
score used by Elasticsearch to rank results. Additional scores may be introduced in the future.
+
The meta
field in main documents contains the score
field, which is currently a JSON object that includes the bm25
score used by Elasticsearch to rank results. Additional scores may be introduced in the future.
From 0924905cfe5f254b5d11cbcc26c334164282b5bb Mon Sep 17 00:00:00 2001
From: grossir <14970769+grossir@users.noreply.github.com>
Date: Thu, 21 Nov 2024 21:04:45 +0000
Subject: [PATCH 6/7] Update freelawproject dependencies
---
poetry.lock | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/poetry.lock b/poetry.lock
index d062c4b304..68f09e1cfb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2320,13 +2320,13 @@ setuptools = "*"
[[package]]
name = "juriscraper"
-version = "2.6.42"
+version = "2.6.43"
description = "An API to scrape American court websites for metadata."
optional = false
python-versions = "*"
files = [
- {file = "juriscraper-2.6.42-py27-none-any.whl", hash = "sha256:010d6578714f3262f16d15bee709872e0584381b93988d0d504bdb774f43b403"},
- {file = "juriscraper-2.6.42.tar.gz", hash = "sha256:293299112201ed217a1eccb05d8cba01aa208e8ed9686d3a6b90c24e752f51fb"},
+ {file = "juriscraper-2.6.43-py27-none-any.whl", hash = "sha256:c2765e5f0a6563fe4842bf72b13aec2b6feb873dc2350523ff6b5102bdf1f757"},
+ {file = "juriscraper-2.6.43.tar.gz", hash = "sha256:99029ab83cbe99673e4598c8e9b30df9e3d21ef98bd78baef9907ab53ad96e10"},
]
[package.dependencies]
From de6ab4b03986eee0edce000a492b361d7523430b Mon Sep 17 00:00:00 2001
From: Alberto Islas
Date: Thu, 21 Nov 2024 16:11:37 -0600
Subject: [PATCH 7/7] fix(search): Fixes o-es-active flag on court_homepage
---
cl/opinion_page/views.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 95cbec270d..0eaf0addb9 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -151,11 +151,11 @@ async def court_homepage(request: HttpRequest, pk: str) -> HttpResponse:
mutable_GET = request.GET.copy()
- es_flag_for_oa = await sync_to_async(waffle.flag_is_active)(
- request, "oa-es-active"
+ es_flag_for_o = await sync_to_async(waffle.flag_is_active)(
+ request, "o-es-active"
)
- if not es_flag_for_oa:
+ if not es_flag_for_o:
# Do solr search
response = await sync_to_async(do_search)(
mutable_GET,