Skip to content

Commit

Permalink
deprecate python3.8, autoformat
Browse files Browse the repository at this point in the history
  • Loading branch information
purarue committed Nov 16, 2024
1 parent e8acd9d commit 95863d1
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 62 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ This was extracted out of [my HPI](https://github.com/purarue/HPI/tree/4bb1f174b

## Installation

Requires `python3.8+`
Requires `python3.9+`

To install with pip, run:

Expand Down
3 changes: 2 additions & 1 deletion google_takeout_parser/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ def key(self) -> int:
class PlayStoreAppInstall(BaseEvent):
title: str
lastUpdateTime: datetime # timestamp for when the installation event occurred
firstInstallationTime: datetime # timetamp for when you first installed the app on the given device
# timestamp for when you first installed the app on the given device
firstInstallationTime: datetime
deviceName: Optional[str]
deviceCarrier: Optional[str]
deviceManufacturer: Optional[str]
Expand Down
16 changes: 8 additions & 8 deletions google_takeout_parser/parse_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@

def _parse_youtube_comment_row(row: Dict[str, Any]) -> Res[CSVYoutubeComment]:
try:
comment_id = row['Comment ID']
channel_id = row['Channel ID']
created_at = row['Comment Create Timestamp']
price = row['Price']
parent_comment_id = row['Parent Comment ID']
video_id = row['Video ID']
textJSON = row['Comment Text']
comment_id = row["Comment ID"]
channel_id = row["Channel ID"]
created_at = row["Comment Create Timestamp"]
price = row["Price"]
parent_comment_id = row["Parent Comment ID"]
video_id = row["Video ID"]
textJSON = row["Comment Text"]
except KeyError as e:
return e
return CSVYoutubeComment(
Expand Down Expand Up @@ -117,7 +117,7 @@ def _validate_content(content: Union[str, Dict[Any, Any]]) -> Res[List[Dict[str,
if i != len(split) - 1:
js = js + json_end
# we get \n as a result of csv parser... but json parser can't handle them!
js = js.replace('\n', '\\n')
js = js.replace("\n", "\\n")
segments.append(json.loads(js))
return segments
# old format
Expand Down
22 changes: 16 additions & 6 deletions google_takeout_parser/parse_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,19 @@ def _parse_app_installs(p: Path) -> Iterator[Res[PlayStoreAppInstall]]:
try:
yield PlayStoreAppInstall(
title=japp["install"]["doc"]["title"],
deviceName=japp.get("install", {}).get("deviceAttribute", {}).get("deviceDisplayName"),
deviceCarrier=japp.get("install", {}).get("deviceAttribute", {}).get("carrier"),
deviceManufacturer=japp.get("install", {}).get("deviceAttribute", {}).get("manufacturer"),
deviceName=japp.get("install", {})
.get("deviceAttribute", {})
.get("deviceDisplayName"),
deviceCarrier=japp.get("install", {})
.get("deviceAttribute", {})
.get("carrier"),
deviceManufacturer=japp.get("install", {})
.get("deviceAttribute", {})
.get("manufacturer"),
lastUpdateTime=parse_json_utc_date(japp["install"]["lastUpdateTime"]),
firstInstallationTime=parse_json_utc_date(japp['install']['firstInstallationTime']),
firstInstallationTime=parse_json_utc_date(
japp["install"]["firstInstallationTime"]
),
)
except Exception as e:
yield e
Expand Down Expand Up @@ -213,7 +221,9 @@ def _parse_semantic_location_history(p: Path) -> Iterator[Res[PlaceVisit]]:
continue
location = CandidateLocation.from_dict(location_json)
placeId = location.placeId
assert placeId is not None, location_json # this is always present for the actual location
assert (
placeId is not None
), location_json # this is always present for the actual location
duration = placeVisit["duration"]
yield PlaceVisit(
name=location.name,
Expand Down Expand Up @@ -266,7 +276,7 @@ def _parse_chrome_history(p: Path) -> Iterator[Res[ChromeHistory]]:
# and there's likely lots of items that aren't https
url=item["url"],
dt=time_naive.replace(tzinfo=timezone.utc),
pageTransition=item.get("page_transition")
pageTransition=item.get("page_transition"),
)
except Exception as e:
yield e
10 changes: 7 additions & 3 deletions google_takeout_parser/path_dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,10 @@ def _warn_if_no_activity(self) -> None:
)

@staticmethod
def _match_handler(relative_path: str, handler: Iterable[Tuple[Pattern[str], Optional[HandlerFunction]]]) -> HandlerMatch:
def _match_handler(
relative_path: str,
handler: Iterable[Tuple[Pattern[str], Optional[HandlerFunction]]],
) -> HandlerMatch:
"""
Match one of the handler regexes to a function which parses the file
"""
Expand Down Expand Up @@ -310,11 +313,12 @@ def iter_relative_paths() -> Iterator[str]:
# many of them will get rejected by the regexes in handlers anyway

takeout_dir_walk: Callable[..., Iterator[Tuple[Path, List[str], List[str]]]]
if hasattr(takeout_dir, 'walk'):
if hasattr(takeout_dir, "walk"):
# this codepath is used from python 3.12 that has Path.walk
# , or other implementations that support it (e.g. zipfile wrappers)
takeout_dir_walk = takeout_dir.walk
else:

def takeout_dir_walk() -> Iterator[Tuple[Path, List[str], List[str]]]:
for root, dirs, files in os.walk(takeout_dir):
yield Path(root), dirs, files
Expand All @@ -326,7 +330,7 @@ def takeout_dir_walk() -> Iterator[Tuple[Path, List[str], List[str]]]:
# compute relative path of parent dir once, this saves a lot of time when takeout has tens of thousands of files
root_relative = root.relative_to(takeout_dir)
for f in files:
if f[0] == '.':
if f[0] == ".":
continue
yield os.path.join(root_relative, f)

Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ classifiers =
Programming Language :: Python
Programming Language :: Python :: 3
Programming Language :: Python :: 3 :: Only
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Programming Language :: Python :: 3.13
keywords = google data parsing

[options]
Expand All @@ -31,7 +31,7 @@ install_requires =
lxml>=4.6.0
platformdirs>=2.3.0
pytz>=2021.3
python_requires = >=3.8
python_requires = >=3.9
include_package_data = True

[options.packages.find]
Expand Down
20 changes: 16 additions & 4 deletions tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ def test_parse_youtube_comment_buffer_old() -> None:
videoId="WtOskFeLmr4",
contentJSON='{"takeoutSegments":[{"text":"coalowl the legend"}]}',
)
assert reconstruct_comment_content(res0.contentJSON, format="text") == "coalowl the legend"
assert (
reconstruct_comment_content(res0.contentJSON, format="text")
== "coalowl the legend"
)

assert not isinstance(res1, Exception)
assert res1 == CSVYoutubeComment(
Expand All @@ -50,7 +53,10 @@ def test_parse_youtube_comment_buffer_old() -> None:
videoId="jH39c5-y6kg",
contentJSON='{"takeoutSegments":[{"text":"Ah, this is the reason why Ive never seen concurrent write failures myself, python\'s default timeout value is 5s, so it just waits in a busy loop if I have \'concurrent writers\'"}]}',
)
assert reconstruct_comment_content(res1.contentJSON, format="text") == "Ah, this is the reason why Ive never seen concurrent write failures myself, python\'s default timeout value is 5s, so it just waits in a busy loop if I have \'concurrent writers\'"
assert (
reconstruct_comment_content(res1.contentJSON, format="text")
== "Ah, this is the reason why Ive never seen concurrent write failures myself, python's default timeout value is 5s, so it just waits in a busy loop if I have 'concurrent writers'"
)


def test_parse_youtube_comment_buffer_new() -> None:
Expand Down Expand Up @@ -79,7 +85,10 @@ def test_parse_youtube_comment_buffer_new() -> None:
videoId="rWVAzS6duAs",
contentJSON='{"text":"> I am about to get buried in the concrete"},{"text":"\n"},{"text":"the most normal Veritasium video!"}',
)
assert reconstruct_comment_content(res0.contentJSON, format="text") == "> I am about to get buried in the concrete\nthe most normal Veritasium video!"
assert (
reconstruct_comment_content(res0.contentJSON, format="text")
== "> I am about to get buried in the concrete\nthe most normal Veritasium video!"
)

assert not isinstance(res1, Exception)
assert res1 == CSVYoutubeComment(
Expand All @@ -91,7 +100,10 @@ def test_parse_youtube_comment_buffer_new() -> None:
videoId="ZuvK-oe647c",
contentJSON='{"text":"Great illustration of Bell inequality!"}',
)
assert reconstruct_comment_content(res1.contentJSON, format="text") == "Great illustration of Bell inequality!"
assert (
reconstruct_comment_content(res1.contentJSON, format="text")
== "Great illustration of Bell inequality!"
)


def test_parse_youtube_live_chat_buffer() -> None:
Expand Down
72 changes: 35 additions & 37 deletions tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def test_chrome_history(tmp_path_f: Path) -> None:
dt=datetime.datetime(
2021, 4, 2, 23, 4, 50, 134513, tzinfo=datetime.timezone.utc
),
pageTransition="LINK"
pageTransition="LINK",
),
]

Expand Down Expand Up @@ -266,7 +266,7 @@ def test_semantic_location_history(tmp_path_f: Path) -> None:
address=None,
locationConfidence=None,
placeId=None,
semanticType='TYPE_WORK',
semanticType="TYPE_WORK",
sourceInfoDeviceTag=None,
),
],
Expand Down Expand Up @@ -320,63 +320,61 @@ def test_semantic_location_history_2024(tmp_path_f: Path) -> None:
"activitySegment": {
"startLocation": {
"latitudeE7": 555555555,
"longitudeE7": -1066666666
"longitudeE7": -1066666666,
},
"endLocation": {
"latitudeE7": 555555567,
"longitudeE7": -1066666678
"longitudeE7": -1066666678,
},
"duration": {
"startTimestamp": "2017-12-11T01:20:06.106Z",
"endTimestamp": "2017-12-11T01:40:06.106Z"
"endTimestamp": "2017-12-11T01:40:06.106Z",
},
"distance": 13071,
"activityType": "IN_PASSENGER_VEHICLE",
"confidence": "MEDIUM",
"activities": [{
"activityType": "IN_PASSENGER_VEHICLE",
"probability": 85.514968640442
}, {
"activityType": "MOTORCYCLING",
"probability": 8.858836042221917
}, {
"activityType": "WALKING",
"probability": 4.7803567526550035
}],
"activities": [
{
"activityType": "IN_PASSENGER_VEHICLE",
"probability": 85.514968640442,
},
{
"activityType": "MOTORCYCLING",
"probability": 8.858836042221917,
},
{"activityType": "WALKING", "probability": 4.7803567526550035},
],
"waypointPath": {
"waypoints": [{
"latE7": 123456789,
"lngE7": 1210000000
}, {
"latE7": 123456089,
"lngE7": 1210000200
}, {
"latE7": 123456289,
"lngE7": 1210000500
}],
"source": "INFERRED"
"waypoints": [
{"latE7": 123456789, "lngE7": 1210000000},
{"latE7": 123456089, "lngE7": 1210000200},
{"latE7": 123456289, "lngE7": 1210000500},
],
"source": "INFERRED",
},
"simplifiedRawPath": {
"points": [{
"latE7": 123456489,
"lngE7": 1210000240,
"accuracyMeters": 10,
"timestamp": "2017-12-11T01:35:04Z"
}]
"points": [
{
"latE7": 123456489,
"lngE7": 1210000240,
"accuracyMeters": 10,
"timestamp": "2017-12-11T01:35:04Z",
}
]
},
"editConfirmationStatus": "NOT_CONFIRMED",
"parkingEvent": {
"location": {
"latitudeE7": 123456289,
"longitudeE7": 1210000500,
"accuracyMetres": 163
"accuracyMetres": 163,
},
"method": "END_OF_ACTIVITY_SEGMENT",
"locationSource": "UNKNOWN",
"timestamp": "2017-12-11T01:40:06Z"
}
"timestamp": "2017-12-11T01:40:06Z",
},
}
}
},
]
}
fp = tmp_path_f / "file"
Expand Down Expand Up @@ -424,7 +422,7 @@ def test_semantic_location_history_2024(tmp_path_f: Path) -> None:
address=None,
locationConfidence=None,
placeId=None,
semanticType='TYPE_WORK',
semanticType="TYPE_WORK",
sourceInfoDeviceTag=None,
),
],
Expand Down

0 comments on commit 95863d1

Please sign in to comment.