Skip to content

Commit

Permalink
load by filename rather than dataset name (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomcarter23 authored Jan 4, 2024
1 parent 55feb67 commit b15f26c
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/synthesized_datasets/_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def load_spark(self, spark: _typing.Optional[_ps.SparkSession] = None) -> _ps.Da
spark = _ps.SparkSession.builder.getOrCreate()

spark.sparkContext.addFile(self.url)
_, ext = _os.path.splitext(self.url)
df = spark.read.csv(_SparkFiles.get("".join([self.name, ext])), header=True, inferSchema=True)
_, filename = _os.path.split(self.url)
df = spark.read.csv(_SparkFiles.get(filename), header=True, inferSchema=True)
df.name = self.name
return df

Expand Down

0 comments on commit b15f26c

Please sign in to comment.