Skip to content

Commit

Permalink
fix: replace plaintext formatter with jsonl
Browse files Browse the repository at this point in the history
  • Loading branch information
alexmaras committed Feb 16, 2024
1 parent 6d589cb commit 526dd61
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 26 deletions.
33 changes: 33 additions & 0 deletions target_s3/formats/format_jsonl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from datetime import datetime

from bson import ObjectId
from simplejson import JSONEncoder, dumps

from target_s3.formats.format_base import FormatBase


class JsonSerialize(JSONEncoder):
def default(self, obj: any) -> any:
if isinstance(obj, ObjectId):
return str(obj)
if isinstance(obj, datetime):
return obj.isoformat()
else:
raise TypeError(f"Type {type(obj)} not serializable")


class FormatJsonl(FormatBase):
def __init__(self, config, context) -> None:
super().__init__(config, context, "jsonl")
pass

def _prepare_records(self):
# use default behavior, no additional prep needed
return super()._prepare_records()

def _write(self) -> None:
return super()._write('\n'.join(map(dumps, self.records)))

def run(self) -> None:
# use default behavior, no additional run steps needed
return super().run(self.context["records"])
22 changes: 0 additions & 22 deletions target_s3/formats/format_plaintext.py

This file was deleted.

4 changes: 2 additions & 2 deletions target_s3/sinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
from target_s3.formats.format_parquet import FormatParquet
from target_s3.formats.format_csv import FormatCsv
from target_s3.formats.format_json import FormatJson
from target_s3.formats.format_plaintext import FormatPlaintext
from target_s3.formats.format_jsonl import FormatJsonl


LOGGER = logging.getLogger("target-s3")
FORMAT_TYPE = {"parquet": FormatParquet, "csv": FormatCsv, "json": FormatJson, "plaintext": FormatPlaintext}
FORMAT_TYPE = {"parquet": FormatParquet, "csv": FormatCsv, "json": FormatJson, "jsonl": FormatJsonl}


class s3Sink(BatchSink):
Expand Down
4 changes: 2 additions & 2 deletions target_s3/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Targets3(Target):
allowed_values=[
"parquet",
"json",
"plaintext",
"jsonl",
], # TODO: configure this from class
),
th.Property(
Expand Down Expand Up @@ -222,4 +222,4 @@ def deserialize_json(self, line: str) -> dict:


if __name__ == "__main__":
Targets3.cli()
Targets3.cli()

0 comments on commit 526dd61

Please sign in to comment.