docs: update docs; update broken links; update style

immunomind · Mar 18, 2024 · 26f3ecd · 26f3ecd
1 parent b9c0ee4
commit 26f3ecd
Show file tree

Hide file tree

Showing 10 changed files with 68 additions and 64 deletions.
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -2,9 +2,10 @@
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 fill_vec <- function(read_vec, read_indices) {
-  .Call(`_immunarch_fill_vec`, read_vec, read_indices)
+    .Call(`_immunarch_fill_vec`, read_vec, read_indices)
 }
 
 fill_reads <- function(new_reads, new_counts) {
-  .Call(`_immunarch_fill_reads`, new_reads, new_counts)
+    .Call(`_immunarch_fill_reads`, new_reads, new_counts)
 }
+
diff --git a/R/explore.R b/R/explore.R
@@ -145,5 +145,3 @@ repExplore <- function(.data, .method = c("volume", "count", "len", "clones"), .
 
   res
 }
-
-rep.ex <- repExplore
diff --git a/R/io.R b/R/io.R
@@ -20,7 +20,7 @@ if (getRversion() >= "2.15.1") {
 #' @importFrom jsonlite read_json
 #' @importFrom stringr str_split str_detect str_replace_all str_trim
 #' @importFrom methods as
-#' @importFrom dplyr contains first select_ group_by_at one_of
+#' @importFrom dplyr contains first select_ group_by_at one_of row_number
 #' @importFrom utils read.table
 #' @importFrom data.table setDF
 #'

diff --git a/R/overlap.R b/R/overlap.R
@@ -196,12 +196,14 @@ overlap_coef <- function(.x, .y) {
   UseMethod("overlap_coef")
 }
 
+#' @export
 overlap_coef.default <- function(.x, .y) {
   .x <- collect(.x, n = Inf)
   .y <- collect(.y, n = Inf)
   nrow(dplyr::intersect(.x, .y)) / min(nrow(.x), nrow(.y))
 }
 
+#' @export
 overlap_coef.character <- function(.x, .y) {
   length(dplyr::intersect(.x, .y)) / min(length(.x), length(.y))
 }
@@ -211,13 +213,15 @@ jaccard_index <- function(.x, .y) {
   UseMethod("jaccard_index")
 }
 
+#' @export
 jaccard_index.default <- function(.x, .y) {
   .x <- collect(.x, n = Inf)
   .y <- collect(.y, n = Inf)
   intersection <- nrow(dplyr::intersect(.x, .y))
   intersection / (nrow(.x) + nrow(.y) - intersection)
 }
 
+#' @export
 jaccard_index.character <- function(.x, .y) {
   intersection <- length(dplyr::intersect(.x, .y))
   intersection / (length(.x) + length(.y) - intersection)
@@ -227,13 +231,15 @@ tversky_index <- function(.x, .y, .a = .5, .b = .5) {
   UseMethod("tversky_index")
 }
 
+#' @export
 tversky_index.default <- function(.x, .y, .a = .5, .b = .5) {
   .x <- collect(.x, n = Inf)
   .y <- collect(.y, n = Inf)
   intersection <- nrow(dplyr::intersect(.x, .y))
   intersection / (.a * nrow(dplyr::setdiff(.x, .y)) + .b * nrow(dplyr::setdiff(.y, .x)) + intersection)
 }
 
+#' @export
 tversky_index.character <- function(.x, .y, .a = .5, .b = .5) {
   intersection <- length(dplyr::intersect(.x, .y))
   intersection / (.a * length(dplyr::setdiff(.x, .y)) + .b * length(dplyr::setdiff(.y, .x)) + intersection)
@@ -243,6 +249,7 @@ cosine_sim <- function(.x, .y, .quant) {
   UseMethod("cosine_sim")
 }
 
+#' @export
 cosine_sim.default <- function(.x, .y, .quant) {
   .x <- collect(.x, n = Inf)
   .y <- collect(.y, n = Inf)
@@ -258,6 +265,7 @@ cosine_sim.default <- function(.x, .y, .quant) {
   sum(first_col * second_col) / (sqrt(sum(first_col * first_col)) * sqrt(sum(second_col * second_col)))
 }
 
+#' @export
 cosine_sim.numeric <- function(.x, .y, .quant) {
   df <- rbind(.x, .y)
   sum(.x * .y) / (sqrt(rowSums(df^2))[1] * sqrt(rowSums(df^2))[2])[[1]]

diff --git a/vignettes/v1_introduction.Rmd b/vignettes/v1_introduction.Rmd
@@ -1,6 +1,6 @@
 ---
 title: "Introduction to `immunarch`"
-author: '<b>ImmunoMind</b> – improving design of T-cell therapies using multi-omics and AI. Research and biopharma partnerships, more details: <a href="https://immunomind.io">immunomind.io</a>'
+author: '<b>ImmunoMind</b> – improving design of T-cell therapies using multi-omics and AI. Research and biopharma partnerships, more details: <a href="https://immunomind.com/">immunomind.com</a>'
 date: "support@immunomind.io"
 output:
   html_document:

diff --git a/vignettes/v2_data.Rmd b/vignettes/v2_data.Rmd
@@ -1,6 +1,6 @@
 ---
 title: "Data loading"
-author: '<b>ImmunoMind</b> – improving design of T-cell therapies using multi-omics and AI. Research and biopharma partnerships, more details: <a href="https://immunomind.io">immunomind.io</a>'
+author: '<b>ImmunoMind</b> – improving design of T-cell therapies using multi-omics and AI. Research and biopharma partnerships, more details: <a href="https://immunomind.com">immunomind.com</a>'
 date: "support@immunomind.io"
 output:
   html_document:
@@ -50,7 +50,7 @@ The package provides several IO functions:
 
 - `"immunarch"` - current software tool, in case you forgot :)
 
-- `"immunoseq"` - https://www.immunoseq.com
+- `"immunoseq"` - https://www.adaptivebiotech.com/adaptive-immunosequencing/
 
 - `"mitcr"` - https://github.com/milaboratory/mitcr
 
@@ -90,7 +90,7 @@ You can load the data from a single file, a list of repertoire file paths, or fr
 If you have your files, you should just specify a path to your file or to a folder with files. Then load data using `repLoad`:
 
 ```{r, eval=F}
-#path argument is a path to the folder with your file or files including the metadata file.
+# path argument is a path to the folder with your file or files including the metadata file.
 immdata <- repLoad(path)
 ```
 
@@ -105,14 +105,14 @@ You can obtain working directory with `getwd()` command
 You could also download all files to the `'example'` folder in your working directory and load all of them by passing folder name to repLoad function in quotation marks:
 
 ```{r, eval=F}
-immdata <- repLoad('example')
+immdata <- repLoad("example")
 ```
 
 The example data is already downloaded with `immunarch` package. You can load all sample files using the following command:
 
 ```{r, eval=F}
-#path to the folder with example data
-file_path <- paste0(system.file(package="immunarch"), "/extdata/io/")
+# path to the folder with example data
+file_path <- paste0(system.file(package = "immunarch"), "/extdata/io/")
 immdata <- repLoad(file_path)
 ```
 
@@ -133,7 +133,7 @@ Otherwise `repLoad` will create a dummy metadata file with only sample names.
 
 ```{r, eval=F}
 # To load the whole folder with every file in it type:
-file_path <- paste0(system.file(package="immunarch"), "/extdata/io/")
+file_path <- paste0(system.file(package = "immunarch"), "/extdata/io/")
 immdata <- repLoad(file_path)
 print(names(immdata))
 
@@ -149,7 +149,6 @@ print(names(immdata))
 # > immdata <- repLoad("path/to/your/folder/")
 # > names(immdata)
 # [1] "data" "meta"
-
 ```
 
 Dummy metadata data frame looks like this:
@@ -183,7 +182,7 @@ con <- DBI::dbConnect(MonetDBLite::MonetDBLite(), embedded = dbdir)
 
 # Write each repertoire to MonetDB. Each table has corresponding name from the DATA
 for (i in seq_along(DATA)) {
-  DBI::dbWriteTable(con, names(DATA)[i], DATA[[i]], overwrite=TRUE)
+  DBI::dbWriteTable(con, names(DATA)[i], DATA[[i]], overwrite = TRUE)
 }
 
 # Create a source in the temporary directory with MonetDB
@@ -240,7 +239,7 @@ nrow(outofframes(immdata$data[[1]]))
 ## Get subset of clonotypes with a specific V gene
 It is simple to subset data frame according to labels in the specified index. In this example the resulting data frame contains only records with 'TRBV10-1' V gene:
 ```{r}
-filter(immdata$data[[1]], V.name == 'TRBV10-1')
+filter(immdata$data[[1]], V.name == "TRBV10-1")
 ```
 
 # Downsampling

diff --git a/vignettes/web_only/load_mixcr.Rmd b/vignettes/web_only/load_mixcr.Rmd
@@ -40,13 +40,13 @@ MiXCR supports the following formats of sequencing data: fasta, fastq, fastq.gz,
 You can choose to use the `analyze amplicon` method to process in one go:
 
 ```{r, eval=F}
-> mixcr analyze amplicon --species hs \
-        --starting-material dna \
-        --5-end v-primers \
-        --3-end j-primers \
-        --adapters adapters-present \
-        --receptor-type IGH \
-        input_R1.fastq input_R2.fastq analysis
+mixcr analyze amplicon --species hs \
+      --starting-material dna \
+      --5-end v-primers \
+      --3-end j-primers \
+      --adapters adapters-present \
+      --receptor-type IGH \
+      input_R1.fastq input_R2.fastq analysis
 ```
 
 or execute each step `align`, `assemble`, and `exportClones` individually.

diff --git a/vignettes/web_only/repFilter_v3.Rmd b/vignettes/web_only/repFilter_v3.Rmd
@@ -75,9 +75,9 @@ data(immdata)
 Look at meta data from `immdata` datasets:
 
 ```{r}
-#look at the metadata
+# look at the metadata
 immdata$meta
-#look at samples name in data
+# look at samples name in data
 names(immdata$data)
 ```
 
@@ -150,15 +150,15 @@ You can also use **multiple conditions**. In this case, the function returns val
 Filter for samples where *Age* is between *15* and *23* and *Lane* is *B* :
 
 ```{r example-2.4}
-repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include('B')))$meta
-names(repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include('B')))$data)
+repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include("B")))$meta
+names(repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include("B")))$data)
 ```
 
 Filter for samples where *Age* is between *15* and *23* and *Lane* is *A* or *B*:
 
 ```{r example-2.5}
-repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include('A', 'B')))$meta
-names(repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include('A', 'B')))$data)
+repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include("A", "B")))$meta
+names(repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include("A", "B")))$data)
 ```
 
 ### Method `by.repertoire` (short alias is `by.rep`)
@@ -219,18 +219,17 @@ In method `by.clonotype` or `by.cl`, there is an extra argument `.match`. The `.
 Filter out all clonotypes within samples with V gene 'TRBV1' or 'TRGV11'
 
 ```{r example-5.1, results = 'hide'}
-repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match="exact")
+repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match = "exact")
 ```
 
 Filter out all clonotypes within samples where V gene name contains substrings 'TRBV1' or 'TRGV11'
 ```{r example-5.2, results = 'hide'}
-repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match="substring")
-
+repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match = "substring")
 ```
 
 Filter out all clonotypes within samples where V gene name starts with 'TRBV1' or 'TRGV11'
 ```{r example-5.3, results = 'hide'}
-repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match="startswith")
+repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match = "startswith")
 ```
 
 ## Using `repFilter` function for single-cell repertoire data analysis
@@ -244,9 +243,9 @@ data(scdata)
 Look at meta data from `scdata` datasets:
 
 ```{r scadata}
-#look at the metadata
+# look at the metadata
 scdata$meta
-#look at samples name in data
+# look at samples name in data
 names(scdata$data)
 ```
 
@@ -271,34 +270,33 @@ vis(repExplore(scdata_cl$data, .method = "volume"))
 Compare J gene usage between three clusters:
 
 ```{r geneUsage,  warning=F}
-sc_active <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Activ')))$data, "hs.trbj", .norm = T)
+sc_active <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Activ")))$data, "hs.trbj", .norm = T)
 p1 <- vis(sc_active)
 p1
 
-sc_memory <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Memory')))$data, "hs.trbj", .norm = T)
+sc_memory <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Memory")))$data, "hs.trbj", .norm = T)
 p2 <- vis(sc_memory)
 p2
 
-sc_naive <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Naive')))$data, "hs.trbj", .norm = T)
+sc_naive <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Naive")))$data, "hs.trbj", .norm = T)
 p3 <- vis(sc_naive)
 p3
-
 ```
 
 Compare gene usage of IGHJ4 between three clusters:
 ```{r IGHV3, warning=F}
-scdata_active <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Activ')))
-scdata_active <- repFilter(scdata_active, .method = "by.cl", .query = list(J.name = include('IGHJ4')), .match = 'substring')
+scdata_active <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Activ")))
+scdata_active <- repFilter(scdata_active, .method = "by.cl", .query = list(J.name = include("IGHJ4")), .match = "substring")
 sc_active <- geneUsage(scdata_active$data, "hs.trbj", .norm = T)
 p1 <- vis(sc_active)
 
-scdata_memory <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Memory')))
-scdata_memory <- repFilter(scdata_memory, .method = "by.cl", .query = list(J.name = include('IGHJ4')), .match = 'substring')
+scdata_memory <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Memory")))
+scdata_memory <- repFilter(scdata_memory, .method = "by.cl", .query = list(J.name = include("IGHJ4")), .match = "substring")
 sc_memory <- geneUsage(scdata_memory$data, "hs.trbj", .norm = T)
 p2 <- vis(sc_memory)
 
-scdata_naive <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Naive')))
-scdata_naive <- repFilter(scdata_naive, .method = "by.cl", .query = list(J.name = include('IGHJ4')), .match = 'substring')
+scdata_naive <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Naive")))
+scdata_naive <- repFilter(scdata_naive, .method = "by.cl", .query = list(J.name = include("IGHJ4")), .match = "substring")
 sc_naive <- geneUsage(scdata_naive$data, "hs.trbj", .norm = T)
 p3 <- vis(sc_naive)
 
@@ -307,31 +305,31 @@ p1 + p2 + p3
 
 Look at the coding clonotypes in each cluster. Note that there aren't any noncoding clonotypes in the datasets:
 ```{r ORF, warning=F}
-scdata_active <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Activ')))
+scdata_active <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Activ")))
 scdata_active <- repFilter(scdata_active, .method = "by.clonotype", .query = list(CDR3.aa = exclude("partial", "out_of_frame")))
 exp_vol <- repExplore(scdata_active$data, .method = "volume")
 p1 <- vis(exp_vol)
 
-exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Activ')))$data, .method = "volume")
+exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Activ")))$data, .method = "volume")
 p2 <- vis(exp_vol)
-p1+p2
+p1 + p2
 
 
-scdata_memory <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Memory')))
+scdata_memory <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Memory")))
 scdata_memory <- repFilter(scdata_memory, .method = "by.clonotype", .query = list(CDR3.aa = exclude("partial", "out_of_frame")))
 exp_vol <- repExplore(scdata_memory$data, .method = "volume")
 p1 <- vis(exp_vol)
 
-exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Memory')))$data, .method = "volume")
+exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Memory")))$data, .method = "volume")
 p2 <- vis(exp_vol)
-p1+p2
+p1 + p2
 
-scdata_naive <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Naive')))
+scdata_naive <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Naive")))
 scdata_naive <- repFilter(scdata_naive, .method = "by.clonotype", .query = list(CDR3.aa = exclude("partial", "out_of_frame")))
 exp_vol <- repExplore(scdata_naive$data, .method = "volume")
 p1 <- vis(exp_vol)
 
-exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Naive')))$data, .method = "volume")
+exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Naive")))$data, .method = "volume")
 p2 <- vis(exp_vol)
-p1+p2
+p1 + p2
 ```