Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
steveberardi committed Jul 24, 2024
1 parent 3a0068c commit 88188fe
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 102 deletions.
52 changes: 0 additions & 52 deletions scripts/bigsky_stars.py

This file was deleted.

78 changes: 45 additions & 33 deletions src/starplot/data/bigsky.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,54 @@
import sys
import os
import requests

from starplot.data import DATA_PATH, DataFiles
import pandas as pd

from starplot.data import DATA_PATH, DataFiles, utils


BIG_SKY_VERSION = "0.1.0"

BIG_SKY_URL = f"https://github.com/steveberardi/bigsky/releases/download/v{BIG_SKY_VERSION}/bigsky.stars.csv.gz"
BIG_SKY_FILENAME = "bigsky.stars.csv.gz"

BIG_SKY_URL = f"https://github.com/steveberardi/bigsky/releases/download/v{BIG_SKY_VERSION}/{BIG_SKY_FILENAME}"

DOWNLOADED_PATH = DATA_PATH / "bigsky.stars.csv.gz"
DOWNLOADED_PATH = DATA_PATH / BIG_SKY_FILENAME

DIGITS = 4

# TODO : refactor this to make it re-usable for different filenames
# TODO : delete the SCRIPT for this in scripts/
BIG_SKY_ASSETS = {
DataFiles.BIG_SKY: "bigsky.stars.csv.gz",
DataFiles.BIG_SKY_MAG11: "bigsky.stars.mag11.csv.gz",
}

def download():
with open(DOWNLOADED_PATH, "wb") as f:
print("Downloading Big Sky Catalog...")

response = requests.get(BIG_SKY_URL, stream=True)
total_size = response.headers.get("content-length")
def url(filename: str, version: str):
return f"https://github.com/steveberardi/bigsky/releases/download/v{version}/{filename}"

if total_size is None:
f.write(response.content)
return

bytes_written = 0
total_size = int(total_size)
for chunk in response.iter_content(chunk_size=4096):
bytes_written += len(chunk)
f.write(chunk)
progress = int(25 * bytes_written / total_size)
sys.stdout.write("\r[%s%s]" % ("=" * progress, " " * (25 - progress)))
sys.stdout.flush()

print("Download complete!")

def download(
filename: str = BIG_SKY_FILENAME,
version: str = BIG_SKY_VERSION,
download_path: str = None,
digits: int = 4,
):
download_path = download_path or str(DATA_PATH / filename)
utils.download(
url(filename, version),
download_path,
"Big Sky Star Catalog",
)
to_parquet(
download_path,
DataFiles.BIG_SKY,
digits,
)

def to_parquet():
import pandas as pd

def to_parquet(source_path: str, destination_path: str, digits: int = DIGITS):
print("Preparing Big Sky Catalog for Starplot...")

df = pd.read_csv(
DOWNLOADED_PATH,
source_path,
header=0,
names=[
"tyc_id",
Expand All @@ -63,7 +66,7 @@ def to_parquet():
)

df["ra_hours"] = df.apply(
lambda row: round(row.ra_degrees_j2000 / 15, DIGITS), axis=1
lambda row: round(row.ra_degrees_j2000 / 15, digits), axis=1
)

df = df.assign(epoch_year=2000)
Expand All @@ -76,8 +79,17 @@ def to_parquet():
}
)

df.to_parquet(DataFiles.BIG_SKY, compression="gzip")
df.to_parquet(destination_path, compression="gzip")


def load(path):
if not exists(path):
download(filename=BIG_SKY_ASSETS.get(path))

df = pd.read_parquet(path)

return df.set_index("tyc_id")


def exists() -> bool:
return os.path.isfile(DataFiles.BIG_SKY)
def exists(path) -> bool:
return os.path.isfile(path)
21 changes: 4 additions & 17 deletions src/starplot/data/stars.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,32 +124,19 @@ class StarCatalog(str, Enum):
"""Big Sky Catalog ~ 2.5M stars"""


def load_hipparcos():
return read_parquet(DataFiles.HIPPARCOS)


def load_bigsky_mag11():
df = read_parquet(DataFiles.BIG_SKY_MAG11)

return df.set_index("tyc_id")


def load_bigsky():
if not bigsky.exists():
bigsky.download()
bigsky.to_parquet()

df = read_parquet(DataFiles.BIG_SKY)

return df.set_index("tyc_id")
return bigsky.load(DataFiles.BIG_SKY)


def load(catalog: StarCatalog = StarCatalog.HIPPARCOS):
if catalog == StarCatalog.HIPPARCOS:
return load_hipparcos()
return read_parquet(DataFiles.HIPPARCOS)
elif catalog == StarCatalog.BIG_SKY_MAG11:
return load_bigsky_mag11()
return bigsky.load(DataFiles.BIG_SKY_MAG11)
elif catalog == StarCatalog.BIG_SKY:
return load_bigsky()
return bigsky.load(DataFiles.BIG_SKY)
else:
raise ValueError("Unrecognized star catalog.")

0 comments on commit 88188fe

Please sign in to comment.