freelawproject · grossir · Nov 21, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/juriscraper/opinions/united_states/state/pa.py b/juriscraper/opinions/united_states/state/pa.py
@@ -17,19 +17,20 @@ class Site(OpinionSiteLinear):
     court = "Supreme"
     base_url = "https://www.pacourts.us/api/opinion?"
     document_url = "https://www.pacourts.us/assets/opinions/{}/out/{}"
-    days_interval = 20
+    days_interval = 1
     api_dt_format = "%Y-%m-%dT00:00:00-05:00"
     first_opinion_date = datetime(1998, 4, 27)
     judge_key = "AuthorCode"
+    regional_cite_regex = re.compile(r"\d{1,3} A\.3d \d+")
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.court_id = self.__module__
         self.regex = re.compile(r"(.*)(?:[,-]?\s+Nos?\.)(.*)")
         self.status = "Published"
 
-        now = datetime.now() + timedelta(days=1)
-        start = now - timedelta(days=7)
+        now = datetime.now()
+        start = now - timedelta(days=1)
         self.params = {
             "startDate": start.strftime(self.api_dt_format),
             "endDate": now.strftime(self.api_dt_format),
@@ -50,9 +51,13 @@ def _process_html(self) -> None:
         json_response = self.html
 
         for cluster in json_response["Items"]:
-            title = cluster["Caption"]
             disposition_date = cluster["DispositionDate"].split("T")[0]
+            title = cluster["Caption"]
             name, docket = self.parse_case_title(title)
+            # A.3d cites seem to exist only for pasuperct
+            cite = ""
+            if cite_match := self.regional_cite_regex.search(title):
+                cite = cite_match.group(0)
 
             for op in cluster["Postings"]:
                 per_curiam = False
@@ -75,9 +80,18 @@ def _process_html(self) -> None:
                         "judge": author_str,
                         "status": status,
                         "per_curiam": per_curiam,
+                        "citation": cite,
                     }
                 )
 
+        if not self.test_mode_enabled() and json_response.get("HasNext"):
+            next_page = json_response["PageNumber"] + 1
+            logger.info("Paginating to page %s", next_page)
+            self.params["pageNumber"] = next_page
+            self.url = f"{self.base_url}{urlencode(self.params)}"
+            self.html = self._download()
+            self._process_html()
+
     def parse_case_title(self, title: str) -> Tuple[str, str]:
         """Separates case_name and docket_number from case string
 

diff --git a/juriscraper/opinions/united_states/state/pacommwct.py b/juriscraper/opinions/united_states/state/pacommwct.py
@@ -19,12 +19,16 @@
 from urllib.parse import urlencode
 
 from juriscraper.opinions.united_states.state import pasuperct
+from juriscraper.OpinionSite import OpinionSite
 
 
 class Site(pasuperct.Site):
     court = "Commonwealth"
-    days_interval = 30
     first_opinion_date = datetime(1998, 8, 17)
+    # Deactivate extract_from_text from parent class
+    # and avoid triggering the example requirement from
+    # tests.local.test_ScraperExtractFromTextTest
+    extract_from_text = OpinionSite.extract_from_text
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)

diff --git a/juriscraper/opinions/united_states/state/pasuperct.py b/juriscraper/opinions/united_states/state/pasuperct.py
@@ -9,15 +9,14 @@
 
 import re
 from datetime import datetime
-from typing import Dict
+from typing import Dict, Optional
 from urllib.parse import urlencode
 
 from juriscraper.opinions.united_states.state import pa
 
 
 class Site(pa.Site):
     court = "Superior"
-    days_interval = 20
     first_opinion_date = datetime(1998, 2, 15)
     judge_key = "AuthorName"
 
@@ -61,3 +60,16 @@ def clean_judge(self, author_str: str) -> str:
                 " by ", " "
             )
         return author_str
+
+    def extract_from_text(self, scraped_text: str) -> Optional[Dict]:
+        """Get neutral citation from the first lines in the first page
+
+        Not all scraped opinions have them
+        """
+        neutral_citation_regex = (
+            r"(?P<volume>\d{4}) (?P<reporter>PA Super) (?P<page>\d+)"
+        )
+        if cite_match := re.search(neutral_citation_regex, scraped_text[:200]):
+            cite_data = cite_match.groupdict()
+            cite_data["type"] = 8  # Neutral citation
+            return {"Citation": cite_data}
diff --git a/tests/examples/opinions/united_states/pa_example.compare.json b/tests/examples/opinions/united_states/pa_example.compare.json
@@ -8,6 +8,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "108 MAP 2023",
     "judges": "Dougherty, Kevin M.",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -20,6 +21,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "108 MAP 2023",
     "judges": "Brobson, P. Kevin",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -32,6 +34,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "108 MAP 2023",
     "judges": "Wecht, David N.",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -44,6 +47,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "212 WAL 2024",
     "judges": "",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   }

diff --git a/tests/examples/opinions/united_states/pacommwct_example.compare.json b/tests/examples/opinions/united_states/pacommwct_example.compare.json
@@ -8,6 +8,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "526 C.D. 2023",
     "judges": "Wallace",
+    "citations": "",
     "case_name_shorts": "PPB",
     "per_curiam": false
   },
@@ -20,6 +21,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "168 M.D. 2023",
     "judges": "Wallace",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -32,6 +34,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "337 M.D. 2023",
     "judges": "Covey",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -44,6 +47,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "379 M.D. 2024",
     "judges": "Leadbetter",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -56,6 +60,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "374 C.D. 2023",
     "judges": "Leavitt",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -68,6 +73,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "716 C.D. 2023",
     "judges": "Covey",
+    "citations": "",
     "case_name_shorts": "UCBR",
     "per_curiam": false
   },
@@ -80,6 +86,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "651 C.D. 2023",
     "judges": "Wallace",
+    "citations": "",
     "case_name_shorts": "S.E.N.",
     "per_curiam": false
   },
@@ -92,6 +99,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "469 C.D. 2023",
     "judges": "Dumas",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -104,6 +112,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "804 C.D. 2023",
     "judges": "Wojcik",
+    "citations": "",
     "case_name_shorts": "PPB",
     "per_curiam": false
   },
@@ -116,6 +125,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "229 C.D. 2022",
     "judges": "Ceisler",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -128,6 +138,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "824 C.D. 2023",
     "judges": "Leavitt",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -140,6 +151,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "264 & 1012 C.D. 2022",
     "judges": "Leadbetter",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -152,6 +164,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "390 C.D. 2023",
     "judges": "McCullough",
+    "citations": "",
     "case_name_shorts": "DHS",
     "per_curiam": false
   },
@@ -164,6 +177,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "506 C.D. 2023",
     "judges": "Covey",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -176,6 +190,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "791 C.D. 2023",
     "judges": "Covey",
+    "citations": "",
     "case_name_shorts": "PPB",
     "per_curiam": false
   },
@@ -188,6 +203,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "164 M.D. 2023",
     "judges": "Covey",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -200,6 +216,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "44 C.D. 2023",
     "judges": "McCullough. Dumas",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -212,6 +229,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "35, 371 & 388 C.D. 2023",
     "judges": "Wallace",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -224,6 +242,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "369 M.D. 2023",
     "judges": "Ceisler",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   },
@@ -236,6 +255,7 @@
     "date_filed_is_approximate": false,
     "docket_numbers": "33 M.D. 2024",
     "judges": "Cohn Jubelirer. McCullough",
+    "citations": "",
     "case_name_shorts": "",
     "per_curiam": false
   }