Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create records from data frames #78

Merged
merged 30 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
57d0f2d
Handle loading CSV and TSV files
lazappi Nov 12, 2024
a06ad13
Add from_df() method to Registry
lazappi Nov 12, 2024
9513d17
Modify Record printing to avoid API calls
lazappi Nov 13, 2024
4d2351f
Recursively create records in Registry$from_df()
lazappi Nov 13, 2024
d27dd95
Add temporary record classes with saving
lazappi Nov 14, 2024
433e278
Overwrite data after saving temporary record
lazappi Nov 14, 2024
ccba121
Create a default instance with connect(slug=NULL)
lazappi Nov 14, 2024
d0bfe5a
Adjust check_requires to output warnings
lazappi Nov 14, 2024
ca4eca9
Attempt to load Python lamin in connect()
lazappi Nov 14, 2024
3deebfd
Add reading for Parquet files
lazappi Nov 14, 2024
cc8cd0a
Document and pass checks
lazappi Nov 14, 2024
2bddcfe
Add get_temporary_record_class() to Registry
lazappi Nov 14, 2024
d8a841d
Remove new file loaders
lazappi Nov 14, 2024
b9b9315
Remove importing reading functions
lazappi Nov 14, 2024
6dc941d
Style package
lazappi Nov 14, 2024
134cc68
Remove broken docs link
lazappi Nov 14, 2024
39f8832
Store user settings in option
lazappi Nov 15, 2024
55e7222
Add delete() method to Record
lazappi Nov 15, 2024
c2aa3d3
Update architecture vignette
lazappi Nov 15, 2024
d079cb3
Update development vignette
lazappi Nov 15, 2024
e6a2792
Roxygenise
lazappi Nov 15, 2024
62540e6
Add test for Artifact$from_df()
lazappi Nov 15, 2024
8d7a5e2
Fix incorrect function call in test
lazappi Nov 15, 2024
7072330
Merge branch 'main' into add-dataframe-artifacts
rcannood Nov 15, 2024
ad09309
Update CHANGELOG
lazappi Nov 18, 2024
0c27537
Error in API$delete_record() if null access token
lazappi Nov 18, 2024
fea16e1
Move importing lamindb to create_instance
lazappi Nov 18, 2024
3dbf7bc
Make Python lamin getter and function not field
lazappi Nov 18, 2024
25ce8fd
Add features to README
lazappi Nov 18, 2024
81274cb
Pass checks and style
lazappi Nov 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions R/Artifact.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,16 @@ ArtifactRecord <- R6::R6Class( # nolint object_name_linter
inherit = Record,
public = list(
#' @description
#' Load the artifact into memory. This currently only supports AnnData
#' artifacts.
#' Load the artifact into memory.
lazappi marked this conversation as resolved.
Show resolved Hide resolved
#'
#' @return The artifact
load = function() {
artifact_accessor <- private$get_value("_accessor")

file_path <- self$cache()

if (artifact_accessor == "AnnData") {
check_requires("Loading AnnData objects", "anndata")
anndata::read_h5ad(file_path)
} else {
cli_abort(paste0("Unsupported accessor: ", artifact_accessor))
}
suffix <- private$get_value("suffix")
file_loader <- get_file_loader(suffix)

file_loader(file_path)
},
#' @description
#' Cache the artifact to the local filesystem. This currently only supports
Expand Down
28 changes: 22 additions & 6 deletions R/Record.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,28 @@ Record <- R6::R6Class( # nolint object_name_linter
"key"
)

record_fields <- private$.api$get_record(
module_name = private$.registry$module$name,
registry_name = private$.registry$name,
id_or_uid = private$.data[["uid"]],
include_foreign_keys = TRUE
)
expected_fields <- private$.registry$get_fields() |>
discard(~ is.null(.x$column_name)) |>
map_chr("column_name")

record_fields <- map(names(expected_fields), function(.field) {
value <- tryCatch(
self[[.field]],
error = function(err) {
if (!grepl("status code 404", conditionMessage(err))) {
cli::abort(conditionMessage(err))
}
NULL
}
)

if (inherits(value, "Record")) {
value <- value$id
}

value
}) |>
setNames(expected_fields)

# Get the important fields that are in the record
important_fields <- intersect(important_fields, names(record_fields))
Expand Down
95 changes: 95 additions & 0 deletions R/Registry.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,29 @@
# Bind entries as rows
list_rbind()
},
from_df = function(dataframe, key = NULL, description = NULL, run = NULL) {
if (private$.registry_name != "artifact") {
cli::cli_abort(
"Creating records from data frames is only supported for the Artifact registry"
)
}

check_requires("Creating records from data frames", "reticulate")

py_lamin <- reticulate::import("lamindb")
lazappi marked this conversation as resolved.
Show resolved Hide resolved

instance_settings <- private$.instance$get_settings()
system2("lamin", "settings set auto-connect false")
py_lamin$connect(
paste0(instance_settings$owner, "/", instance_settings$name)
)

py_record <- py_lamin$Artifact$from_df(
dataframe, key = key, description = description, run = run
)

create_record_from_python(py_record, private$.instance)
},
#' @description
#' Get the fields in the registry.
#'
Expand Down Expand Up @@ -346,3 +369,75 @@
}
)
)

create_record_from_python <- function(py_record, instance) {

py_classes <- class(py_record)

# Skip related fields for now
if ("django.db.models.manager.Manager" %in% py_classes) {
return(NULL)
}

class_split <- strsplit(py_classes[1], "\\.")[[1]]
module_name <- class_split[1]
if (module_name == "lnschema_core") {
module_name <- "core"
}
registry_name <- tolower(class_split[3])

registry <- instance$get_module(module_name)$get_registry(registry_name)
fields <- registry$get_field_names()

record_list <- map(fields, function(.field) {
value <- tryCatch(
py_record[[.field]],
error = function(err) {
NULL
}
)
if (inherits(value, "lnschema_core.models.Record")) {
value <- create_record_from_python(value, instance)
}
value
}) |>
setNames(fields)

record_class <- registry$get_record_class()
temp_record_class <- create_temporary_record_class(record_class)
rcannood marked this conversation as resolved.
Show resolved Hide resolved

suppressWarnings(temp_record_class$new(record_class, py_record, record_list))
}

create_temporary_record_class <- function(record_class) {
R6::R6Class(
paste0("Temporary", record_class$classname),
cloneable = FALSE,
inherit = record_class,
public = list(
initialize = function(record_class, py_record, data) {
private$.record_class <- record_class
private$.py_record <- py_record

super$initialize(data)

Check warning on line 422 in R/Registry.R

View workflow job for this annotation

GitHub Actions / lint

file=R/Registry.R,line=422,col=9,[object_usage_linter] no visible binding for global variable 'super'
},
save = function() {
private$.py_record$save()

Check warning on line 425 in R/Registry.R

View workflow job for this annotation

GitHub Actions / lint

file=R/Registry.R,line=425,col=9,[object_usage_linter] no visible binding for global variable 'private'
private$.registry$get(self$uid)

Check warning on line 426 in R/Registry.R

View workflow job for this annotation

GitHub Actions / lint

file=R/Registry.R,line=426,col=9,[object_usage_linter] no visible binding for global variable 'private'

Check warning on line 426 in R/Registry.R

View workflow job for this annotation

GitHub Actions / lint

file=R/Registry.R,line=426,col=31,[object_usage_linter] no visible binding for global variable 'self'
},
#' @description
#' Print a `TemporaryRecord`
#'
#' @param style Logical, whether the output is styled using ANSI codes
print = function(style = TRUE) {
cat("!!! TEMPORARY RECORD !!!")
cat("\n\n")
super$print()

Check warning on line 435 in R/Registry.R

View workflow job for this annotation

GitHub Actions / lint

file=R/Registry.R,line=435,col=9,[object_usage_linter] no visible binding for global variable 'super'
}
),
private = list(
.record_class = NULL,
.py_record = NULL
)
)
}
48 changes: 48 additions & 0 deletions R/file_handlers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#' Get file loader
#'
#' Get the correct file loader function based on a file suffix
#'
#' @param suffix String giving a file suffix
#'
#' @return Function that can be used to load the file
#' @noRd
get_file_loader <- function(suffix) {
switch (suffix,

Check warning on line 10 in R/file_handlers.R

View workflow job for this annotation

GitHub Actions / lint

file=R/file_handlers.R,line=10,col=9,[function_left_parentheses_linter] Remove spaces before the left parenthesis in a function call.
".h5ad" = load_h5ad,
".csv" = load_csv,
".tsv" = load_tsv,
cli::cli_abort("Loading files with suffix {suffix} is not supported")
)
}

#' Load a H5AD file
#'
#' @param file Path to the file to load
#'
#' @return An `anndata::AnnDataR6` object
#' @noRd
load_h5ad <- function(file) {
check_requires("Loading AnnData objects", "anndata")

anndata::read_h5ad(file)
}

#' Load a CSV file
#'
#' @param file Path to the file to load
#'
#' @return A `data.frame`
#' @noRd
load_csv <- function(file) {
read.csv(file)
rcannood marked this conversation as resolved.
Show resolved Hide resolved
}

#' Load a TSV file
#'
#' @param file Path to the file to load
#'
#' @return A `data.frame`
#' @noRd
load_tsv <- function(file) {
read.delim(file)
}
Loading