Skip to content

Commit

Permalink
docs: update docs; update broken links; update style
Browse files Browse the repository at this point in the history
  • Loading branch information
vadimnazarov committed Mar 18, 2024
1 parent b9c0ee4 commit 26f3ecd
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 64 deletions.
5 changes: 3 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

fill_vec <- function(read_vec, read_indices) {
.Call(`_immunarch_fill_vec`, read_vec, read_indices)
.Call(`_immunarch_fill_vec`, read_vec, read_indices)
}

fill_reads <- function(new_reads, new_counts) {
.Call(`_immunarch_fill_reads`, new_reads, new_counts)
.Call(`_immunarch_fill_reads`, new_reads, new_counts)
}

2 changes: 0 additions & 2 deletions R/explore.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,5 +145,3 @@ repExplore <- function(.data, .method = c("volume", "count", "len", "clones"), .

res
}

rep.ex <- repExplore
2 changes: 1 addition & 1 deletion R/io.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ if (getRversion() >= "2.15.1") {
#' @importFrom jsonlite read_json
#' @importFrom stringr str_split str_detect str_replace_all str_trim
#' @importFrom methods as
#' @importFrom dplyr contains first select_ group_by_at one_of
#' @importFrom dplyr contains first select_ group_by_at one_of row_number
#' @importFrom utils read.table
#' @importFrom data.table setDF
#'
Expand Down
8 changes: 8 additions & 0 deletions R/overlap.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,14 @@ overlap_coef <- function(.x, .y) {
UseMethod("overlap_coef")
}

#' @export
overlap_coef.default <- function(.x, .y) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
nrow(dplyr::intersect(.x, .y)) / min(nrow(.x), nrow(.y))
}

#' @export
overlap_coef.character <- function(.x, .y) {
length(dplyr::intersect(.x, .y)) / min(length(.x), length(.y))
}
Expand All @@ -211,13 +213,15 @@ jaccard_index <- function(.x, .y) {
UseMethod("jaccard_index")
}

#' @export
jaccard_index.default <- function(.x, .y) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
intersection <- nrow(dplyr::intersect(.x, .y))
intersection / (nrow(.x) + nrow(.y) - intersection)
}

#' @export
jaccard_index.character <- function(.x, .y) {
intersection <- length(dplyr::intersect(.x, .y))
intersection / (length(.x) + length(.y) - intersection)
Expand All @@ -227,13 +231,15 @@ tversky_index <- function(.x, .y, .a = .5, .b = .5) {
UseMethod("tversky_index")
}

#' @export
tversky_index.default <- function(.x, .y, .a = .5, .b = .5) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
intersection <- nrow(dplyr::intersect(.x, .y))
intersection / (.a * nrow(dplyr::setdiff(.x, .y)) + .b * nrow(dplyr::setdiff(.y, .x)) + intersection)
}

#' @export
tversky_index.character <- function(.x, .y, .a = .5, .b = .5) {
intersection <- length(dplyr::intersect(.x, .y))
intersection / (.a * length(dplyr::setdiff(.x, .y)) + .b * length(dplyr::setdiff(.y, .x)) + intersection)
Expand All @@ -243,6 +249,7 @@ cosine_sim <- function(.x, .y, .quant) {
UseMethod("cosine_sim")
}

#' @export
cosine_sim.default <- function(.x, .y, .quant) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
Expand All @@ -258,6 +265,7 @@ cosine_sim.default <- function(.x, .y, .quant) {
sum(first_col * second_col) / (sqrt(sum(first_col * first_col)) * sqrt(sum(second_col * second_col)))
}

#' @export
cosine_sim.numeric <- function(.x, .y, .quant) {
df <- rbind(.x, .y)
sum(.x * .y) / (sqrt(rowSums(df^2))[1] * sqrt(rowSums(df^2))[2])[[1]]
Expand Down
2 changes: 1 addition & 1 deletion vignettes/v1_introduction.Rmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: "Introduction to `immunarch`"
author: '<b>ImmunoMind</b> – improving design of T-cell therapies using multi-omics and AI. Research and biopharma partnerships, more details: <a href="https://immunomind.io">immunomind.io</a>'
author: '<b>ImmunoMind</b> – improving design of T-cell therapies using multi-omics and AI. Research and biopharma partnerships, more details: <a href="https://immunomind.com/">immunomind.com</a>'
date: "support@immunomind.io"
output:
html_document:
Expand Down
19 changes: 9 additions & 10 deletions vignettes/v2_data.Rmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: "Data loading"
author: '<b>ImmunoMind</b> – improving design of T-cell therapies using multi-omics and AI. Research and biopharma partnerships, more details: <a href="https://immunomind.io">immunomind.io</a>'
author: '<b>ImmunoMind</b> – improving design of T-cell therapies using multi-omics and AI. Research and biopharma partnerships, more details: <a href="https://immunomind.com">immunomind.com</a>'
date: "support@immunomind.io"
output:
html_document:
Expand Down Expand Up @@ -50,7 +50,7 @@ The package provides several IO functions:

- `"immunarch"` - current software tool, in case you forgot :)

- `"immunoseq"` - https://www.immunoseq.com
- `"immunoseq"` - https://www.adaptivebiotech.com/adaptive-immunosequencing/

- `"mitcr"` - https://github.com/milaboratory/mitcr

Expand Down Expand Up @@ -90,7 +90,7 @@ You can load the data from a single file, a list of repertoire file paths, or fr
If you have your files, you should just specify a path to your file or to a folder with files. Then load data using `repLoad`:

```{r, eval=F}
#path argument is a path to the folder with your file or files including the metadata file.
# path argument is a path to the folder with your file or files including the metadata file.
immdata <- repLoad(path)
```

Expand All @@ -105,14 +105,14 @@ You can obtain working directory with `getwd()` command
You could also download all files to the `'example'` folder in your working directory and load all of them by passing folder name to repLoad function in quotation marks:

```{r, eval=F}
immdata <- repLoad('example')
immdata <- repLoad("example")
```

The example data is already downloaded with `immunarch` package. You can load all sample files using the following command:

```{r, eval=F}
#path to the folder with example data
file_path <- paste0(system.file(package="immunarch"), "/extdata/io/")
# path to the folder with example data
file_path <- paste0(system.file(package = "immunarch"), "/extdata/io/")
immdata <- repLoad(file_path)
```

Expand All @@ -133,7 +133,7 @@ Otherwise `repLoad` will create a dummy metadata file with only sample names.

```{r, eval=F}
# To load the whole folder with every file in it type:
file_path <- paste0(system.file(package="immunarch"), "/extdata/io/")
file_path <- paste0(system.file(package = "immunarch"), "/extdata/io/")
immdata <- repLoad(file_path)
print(names(immdata))
Expand All @@ -149,7 +149,6 @@ print(names(immdata))
# > immdata <- repLoad("path/to/your/folder/")
# > names(immdata)
# [1] "data" "meta"
```

Dummy metadata data frame looks like this:
Expand Down Expand Up @@ -183,7 +182,7 @@ con <- DBI::dbConnect(MonetDBLite::MonetDBLite(), embedded = dbdir)
# Write each repertoire to MonetDB. Each table has corresponding name from the DATA
for (i in seq_along(DATA)) {
DBI::dbWriteTable(con, names(DATA)[i], DATA[[i]], overwrite=TRUE)
DBI::dbWriteTable(con, names(DATA)[i], DATA[[i]], overwrite = TRUE)
}
# Create a source in the temporary directory with MonetDB
Expand Down Expand Up @@ -240,7 +239,7 @@ nrow(outofframes(immdata$data[[1]]))
## Get subset of clonotypes with a specific V gene
It is simple to subset data frame according to labels in the specified index. In this example the resulting data frame contains only records with 'TRBV10-1' V gene:
```{r}
filter(immdata$data[[1]], V.name == 'TRBV10-1')
filter(immdata$data[[1]], V.name == "TRBV10-1")
```

# Downsampling
Expand Down
14 changes: 7 additions & 7 deletions vignettes/web_only/load_mixcr.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ MiXCR supports the following formats of sequencing data: fasta, fastq, fastq.gz,
You can choose to use the `analyze amplicon` method to process in one go:

```{r, eval=F}
> mixcr analyze amplicon --species hs \
--starting-material dna \
--5-end v-primers \
--3-end j-primers \
--adapters adapters-present \
--receptor-type IGH \
input_R1.fastq input_R2.fastq analysis
mixcr analyze amplicon --species hs \
--starting-material dna \
--5-end v-primers \
--3-end j-primers \
--adapters adapters-present \
--receptor-type IGH \
input_R1.fastq input_R2.fastq analysis
```

or execute each step `align`, `assemble`, and `exportClones` individually.
Expand Down
60 changes: 29 additions & 31 deletions vignettes/web_only/repFilter_v3.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ data(immdata)
Look at meta data from `immdata` datasets:

```{r}
#look at the metadata
# look at the metadata
immdata$meta
#look at samples name in data
# look at samples name in data
names(immdata$data)
```

Expand Down Expand Up @@ -150,15 +150,15 @@ You can also use **multiple conditions**. In this case, the function returns val
Filter for samples where *Age* is between *15* and *23* and *Lane* is *B* :

```{r example-2.4}
repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include('B')))$meta
names(repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include('B')))$data)
repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include("B")))$meta
names(repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include("B")))$data)
```

Filter for samples where *Age* is between *15* and *23* and *Lane* is *A* or *B*:

```{r example-2.5}
repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include('A', 'B')))$meta
names(repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include('A', 'B')))$data)
repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include("A", "B")))$meta
names(repFilter(immdata, .method = "by.meta", .query = list(Age = interval(15, 23), Lane = include("A", "B")))$data)
```

### Method `by.repertoire` (short alias is `by.rep`)
Expand Down Expand Up @@ -219,18 +219,17 @@ In method `by.clonotype` or `by.cl`, there is an extra argument `.match`. The `.
Filter out all clonotypes within samples with V gene 'TRBV1' or 'TRGV11'

```{r example-5.1, results = 'hide'}
repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match="exact")
repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match = "exact")
```

Filter out all clonotypes within samples where V gene name contains substrings 'TRBV1' or 'TRGV11'
```{r example-5.2, results = 'hide'}
repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match="substring")
repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match = "substring")
```

Filter out all clonotypes within samples where V gene name starts with 'TRBV1' or 'TRGV11'
```{r example-5.3, results = 'hide'}
repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match="startswith")
repFilter(immdata, .method = "by.clonotype", .query = list(V.name = exclude("TRBV1", "TRGV11")), .match = "startswith")
```

## Using `repFilter` function for single-cell repertoire data analysis
Expand All @@ -244,9 +243,9 @@ data(scdata)
Look at meta data from `scdata` datasets:

```{r scadata}
#look at the metadata
# look at the metadata
scdata$meta
#look at samples name in data
# look at samples name in data
names(scdata$data)
```

Expand All @@ -271,34 +270,33 @@ vis(repExplore(scdata_cl$data, .method = "volume"))
Compare J gene usage between three clusters:

```{r geneUsage, warning=F}
sc_active <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Activ')))$data, "hs.trbj", .norm = T)
sc_active <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Activ")))$data, "hs.trbj", .norm = T)
p1 <- vis(sc_active)
p1
sc_memory <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Memory')))$data, "hs.trbj", .norm = T)
sc_memory <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Memory")))$data, "hs.trbj", .norm = T)
p2 <- vis(sc_memory)
p2
sc_naive <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Naive')))$data, "hs.trbj", .norm = T)
sc_naive <- geneUsage(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Naive")))$data, "hs.trbj", .norm = T)
p3 <- vis(sc_naive)
p3
```

Compare gene usage of IGHJ4 between three clusters:
```{r IGHV3, warning=F}
scdata_active <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Activ')))
scdata_active <- repFilter(scdata_active, .method = "by.cl", .query = list(J.name = include('IGHJ4')), .match = 'substring')
scdata_active <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Activ")))
scdata_active <- repFilter(scdata_active, .method = "by.cl", .query = list(J.name = include("IGHJ4")), .match = "substring")
sc_active <- geneUsage(scdata_active$data, "hs.trbj", .norm = T)
p1 <- vis(sc_active)
scdata_memory <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Memory')))
scdata_memory <- repFilter(scdata_memory, .method = "by.cl", .query = list(J.name = include('IGHJ4')), .match = 'substring')
scdata_memory <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Memory")))
scdata_memory <- repFilter(scdata_memory, .method = "by.cl", .query = list(J.name = include("IGHJ4")), .match = "substring")
sc_memory <- geneUsage(scdata_memory$data, "hs.trbj", .norm = T)
p2 <- vis(sc_memory)
scdata_naive <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Naive')))
scdata_naive <- repFilter(scdata_naive, .method = "by.cl", .query = list(J.name = include('IGHJ4')), .match = 'substring')
scdata_naive <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Naive")))
scdata_naive <- repFilter(scdata_naive, .method = "by.cl", .query = list(J.name = include("IGHJ4")), .match = "substring")
sc_naive <- geneUsage(scdata_naive$data, "hs.trbj", .norm = T)
p3 <- vis(sc_naive)
Expand All @@ -307,31 +305,31 @@ p1 + p2 + p3

Look at the coding clonotypes in each cluster. Note that there aren't any noncoding clonotypes in the datasets:
```{r ORF, warning=F}
scdata_active <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Activ')))
scdata_active <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Activ")))
scdata_active <- repFilter(scdata_active, .method = "by.clonotype", .query = list(CDR3.aa = exclude("partial", "out_of_frame")))
exp_vol <- repExplore(scdata_active$data, .method = "volume")
p1 <- vis(exp_vol)
exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Activ')))$data, .method = "volume")
exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Activ")))$data, .method = "volume")
p2 <- vis(exp_vol)
p1+p2
p1 + p2
scdata_memory <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Memory')))
scdata_memory <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Memory")))
scdata_memory <- repFilter(scdata_memory, .method = "by.clonotype", .query = list(CDR3.aa = exclude("partial", "out_of_frame")))
exp_vol <- repExplore(scdata_memory$data, .method = "volume")
p1 <- vis(exp_vol)
exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Memory')))$data, .method = "volume")
exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Memory")))$data, .method = "volume")
p2 <- vis(exp_vol)
p1+p2
p1 + p2
scdata_naive <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Naive')))
scdata_naive <- repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Naive")))
scdata_naive <- repFilter(scdata_naive, .method = "by.clonotype", .query = list(CDR3.aa = exclude("partial", "out_of_frame")))
exp_vol <- repExplore(scdata_naive$data, .method = "volume")
p1 <- vis(exp_vol)
exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include('Naive')))$data, .method = "volume")
exp_vol <- repExplore(repFilter(scdata_cl, .method = "by.meta", .query = list(Cluster = include("Naive")))$data, .method = "volume")
p2 <- vis(exp_vol)
p1+p2
p1 + p2
```
Loading

0 comments on commit 26f3ecd

Please sign in to comment.