diff --git a/dataset_processing/notebooks/CuiHacohen2023.Rmd b/dataset_processing/notebooks/CuiHacohen2023.Rmd new file mode 100644 index 0000000..27e7b63 --- /dev/null +++ b/dataset_processing/notebooks/CuiHacohen2023.Rmd @@ -0,0 +1,36 @@ +--- +title: "R Notebook" +output: html_notebook +--- +```{r} +library(Seurat) +``` + + +```{r} +data = Seurat::ReadMtx('/scratch/peidli/scPerturb/CuiHacohen2023/cytokine-samples17-matrix.mtx', '/scratch/peidli/scPerturb/CuiHacohen2023/cytokine-samples17-barcodes.tsv', '/scratch/peidli/scPerturb/CuiHacohen2023/cytokine-samples17-features.tsv', strip.suffix=T) +tags = Seurat::ReadMtx('/scratch/peidli/scPerturb/CuiHacohen2023/cytokine-hashtags17-matrix.mtx', '/scratch/peidli/scPerturb/CuiHacohen2023/cytokine-hashtags17-barcodes.tsv', '/scratch/peidli/scPerturb/CuiHacohen2023/cytokine-hashtags17-features.tsv', feature.column=1) +``` +```{r} +joint.bcs <- intersect(colnames(data), colnames(tags)) +print(length(joint.bcs)) +data <- data[, joint.bcs] +tags <- tags[, joint.bcs] +tags <- tags[rowSums(tags) > 10000,] # Remove tags with <10k hashtag counts +rowSums(tags) +``` + +```{r} +# Create Seurat object and demultiplex +seurat_object <- Seurat::CreateSeuratObject(counts = data) +seurat_object[["HTO"]] <- Seurat::CreateAssayObject(counts = tags) +``` + + +```{r} +seurat_object <- Seurat::NormalizeData(seurat_object, assay = "HTO", normalization.method = "CLR") +seurat_object <- Seurat::MULTIseqDemux(seurat_object, assay = "HTO") +``` + + + diff --git a/dataset_processing/notebooks/CuiHacohen2023.ipynb b/dataset_processing/notebooks/CuiHacohen2023.ipynb new file mode 100644 index 0000000..c1bd42d --- /dev/null +++ b/dataset_processing/notebooks/CuiHacohen2023.ipynb @@ -0,0 +1,577 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "33f8503a-5cf0-4868-bbd9-96517a1824c7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "pyopenssl 23.0.0 requires cryptography<40,>=38.0.0, but you have cryptography 41.0.7 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install mygene statannotations scrublet scanpy scvelo decoupler matplotlib_venn goatools gseapy scperturb biomart PyComplexHeatmap statsmodels omnipath git+https://github.com/saezlab/pypath.git --quiet" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cc825646-f94b-4535-87e3-64f4a7f4b2cb", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_50/1400239515.py:17: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n", + " from IPython.core.display import display, HTML\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import subprocess\n", + "import os\n", + "import sys\n", + "import matplotlib.backends.backend_pdf\n", + "import scanpy as sc\n", + "import matplotlib.pyplot as pl\n", + "import anndata as ad\n", + "import pandas as pd\n", + "import numpy as np\n", + "import seaborn as sns\n", + "\n", + "from pathlib import Path\n", + "\n", + "# Jupyter stuff\n", + "from tqdm.notebook import tqdm\n", + "from IPython.display import clear_output\n", + "from IPython.core.display import display, HTML\n", + "display(HTML(\"\"))\n", + "\n", + "%matplotlib inline\n", + "\n", + "# Custom functions\n", + "sys.path.insert(1, '../')\n", + "from utils import *\n", + "\n", + "# scperturb package\n", + "sys.path.insert(1, '../package/src/')\n", + "from scperturb import *\n", + "\n", + "from pathlib import Path\n", + "figure_path = Path('../figures/')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "20257eca-28f2-4656-803c-430d8ee7a08d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "TEMPDIR = Path('/scratch/peidli/scPerturb/')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "986f8de6-1fe1-4cf9-8f87-9ba522328ba0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ids = [f'0{x}' if x < 10 else str(x) for x in range(1, 46) if x != 12] # sample 12 is missing" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d113ccc5-0e0c-4513-b17b-d29a3a28a1cf", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "files = [TEMPDIR / \"CuiHacohen2023/samples20_temp_annotated.h5ad\", TEMPDIR / \"CuiHacohen2023/samples21_temp_annotated.h5ad\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b2cc2f51-63fa-4af7-8675-c4aafedc7fc2", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6a3a5afaa8b24ff1b015f323deb09b91", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/2 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
perturbationbatchbio_replicatesamplencountsngenesncounts_tagshashtag_IDnpertsperturbation_typediseasecancertissue_typecelltypeorganism
AAACCCAAGCCAGACA-20PBS7.0rep8cytokine-samples2012511030071cytokineshealthyFalseprimarymixed cells from draining lymph nodesmouse
AAACCCAAGCCTCAAT-20IL-17A7.0rep1cytokine-samples2024519438811cytokineshealthyFalseprimarymixed cells from draining lymph nodesmouse
AAACCCAAGTTTCAGC-20IL-17C7.0rep1cytokine-samples201039368331cytokineshealthyFalseprimarymixed cells from draining lymph nodesmouse
AAACCCACACCAAATC-20PBS7.0rep8cytokine-samples2014411922971cytokineshealthyFalseprimarymixed cells from draining lymph nodesmouse
AAACCCAGTACTAACC-20PBS7.0rep8cytokine-samples2013410123271cytokineshealthyFalseprimarymixed cells from draining lymph nodesmouse
................................................
TTTGTTGAGGGCCTCT-21PBS7.0rep7cytokine-samples211059540861cytokineshealthyFalseprimarymixed cells from draining lymph nodesmouse
TTTGTTGCAATCCTTT-21C3a7.0rep2cytokine-samples2150530636011cytokineshealthyFalseprimarymixed cells from draining lymph nodesmouse
TTTGTTGCAATTCGTG-21C3a7.0rep2cytokine-samples211149628311cytokineshealthyFalseprimarymixed cells from draining lymph nodesmouse
TTTGTTGCAGAGGCTA-21nan7.0NaNcytokine-samples211569463550cytokineshealthyFalseprimarymixed cells from draining lymph nodesmouse
TTTGTTGCAGTCTGGC-21IL-17D7.0rep2cytokine-samples2139761348116231cytokineshealthyFalseprimarymixed cells from draining lymph nodesmouse
\n", + "

15515 rows × 15 columns

\n", + "" + ], + "text/plain": [ + " perturbation batch bio_replicate sample \\\n", + "AAACCCAAGCCAGACA-20 PBS 7.0 rep8 cytokine-samples20 \n", + "AAACCCAAGCCTCAAT-20 IL-17A 7.0 rep1 cytokine-samples20 \n", + "AAACCCAAGTTTCAGC-20 IL-17C 7.0 rep1 cytokine-samples20 \n", + "AAACCCACACCAAATC-20 PBS 7.0 rep8 cytokine-samples20 \n", + "AAACCCAGTACTAACC-20 PBS 7.0 rep8 cytokine-samples20 \n", + "... ... ... ... ... \n", + "TTTGTTGAGGGCCTCT-21 PBS 7.0 rep7 cytokine-samples21 \n", + "TTTGTTGCAATCCTTT-21 C3a 7.0 rep2 cytokine-samples21 \n", + "TTTGTTGCAATTCGTG-21 C3a 7.0 rep2 cytokine-samples21 \n", + "TTTGTTGCAGAGGCTA-21 nan 7.0 NaN cytokine-samples21 \n", + "TTTGTTGCAGTCTGGC-21 IL-17D 7.0 rep2 cytokine-samples21 \n", + "\n", + " ncounts ngenes ncounts_tags hashtag_ID nperts \\\n", + "AAACCCAAGCCAGACA-20 125 110 300 7 1 \n", + "AAACCCAAGCCTCAAT-20 245 194 388 1 1 \n", + "AAACCCAAGTTTCAGC-20 103 93 683 3 1 \n", + "AAACCCACACCAAATC-20 144 119 229 7 1 \n", + "AAACCCAGTACTAACC-20 134 101 232 7 1 \n", + "... ... ... ... ... ... \n", + "TTTGTTGAGGGCCTCT-21 105 95 408 6 1 \n", + "TTTGTTGCAATCCTTT-21 505 306 360 1 1 \n", + "TTTGTTGCAATTCGTG-21 114 96 283 1 1 \n", + "TTTGTTGCAGAGGCTA-21 156 94 635 5 0 \n", + "TTTGTTGCAGTCTGGC-21 3976 1348 1162 3 1 \n", + "\n", + " perturbation_type disease cancer tissue_type \\\n", + "AAACCCAAGCCAGACA-20 cytokines healthy False primary \n", + "AAACCCAAGCCTCAAT-20 cytokines healthy False primary \n", + "AAACCCAAGTTTCAGC-20 cytokines healthy False primary \n", + "AAACCCACACCAAATC-20 cytokines healthy False primary \n", + "AAACCCAGTACTAACC-20 cytokines healthy False primary \n", + "... ... ... ... ... \n", + "TTTGTTGAGGGCCTCT-21 cytokines healthy False primary \n", + "TTTGTTGCAATCCTTT-21 cytokines healthy False primary \n", + "TTTGTTGCAATTCGTG-21 cytokines healthy False primary \n", + "TTTGTTGCAGAGGCTA-21 cytokines healthy False primary \n", + "TTTGTTGCAGTCTGGC-21 cytokines healthy False primary \n", + "\n", + " celltype organism \n", + "AAACCCAAGCCAGACA-20 mixed cells from draining lymph nodes mouse \n", + "AAACCCAAGCCTCAAT-20 mixed cells from draining lymph nodes mouse \n", + "AAACCCAAGTTTCAGC-20 mixed cells from draining lymph nodes mouse \n", + "AAACCCACACCAAATC-20 mixed cells from draining lymph nodes mouse \n", + "AAACCCAGTACTAACC-20 mixed cells from draining lymph nodes mouse \n", + "... ... ... \n", + "TTTGTTGAGGGCCTCT-21 mixed cells from draining lymph nodes mouse \n", + "TTTGTTGCAATCCTTT-21 mixed cells from draining lymph nodes mouse \n", + "TTTGTTGCAATTCGTG-21 mixed cells from draining lymph nodes mouse \n", + "TTTGTTGCAGAGGCTA-21 mixed cells from draining lymph nodes mouse \n", + "TTTGTTGCAGTCTGGC-21 mixed cells from draining lymph nodes mouse \n", + "\n", + "[15515 rows x 15 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata.obs" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "aa411d67-e5e6-42ec-a227-dd07ac0de238", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "adata = sc.read(TEMPDIR / \"CuiHacohen2023.h5ad\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d0e488d-1fe2-4f14-bb04-239814fb23bf", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dataset_processing/notebooks/CuiHacohen2023.nb.html b/dataset_processing/notebooks/CuiHacohen2023.nb.html new file mode 100644 index 0000000..e2a04e0 --- /dev/null +++ b/dataset_processing/notebooks/CuiHacohen2023.nb.html @@ -0,0 +1,1855 @@ + + + + + + + + + + + + + +R Notebook + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
setwd("/scratch/peidli/scPerturb/CuiHacohen2023/")
+
+ + +
Warning: The working directory was changed to /scratch/peidli/scPerturb/CuiHacohen2023 inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
+ + + + + + + + +
# Create Seurat object and demultiplex
+seurat_object <- Seurat::CreateSeuratObject(counts = data)
+seurat_object[["HTO"]] <- Seurat::CreateAssayObject(counts = tags)
+ + + + + + +
seurat_object <- Seurat::NormalizeData(seurat_object, assay = "HTO", normalization.method = "CLR")
+seurat_object <- Seurat::MULTIseqDemux(seurat_object, assay = "HTO")
+ + + + + +
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQpgYGB7cn0KbGlicmFyeShTZXVyYXQpCmBgYAoKCmBgYHtyfQpkYXRhID0gU2V1cmF0OjpSZWFkTXR4KCcvc2NyYXRjaC9wZWlkbGkvc2NQZXJ0dXJiL0N1aUhhY29oZW4yMDIzL2N5dG9raW5lLXNhbXBsZXMxNy1tYXRyaXgubXR4JywgJy9zY3JhdGNoL3BlaWRsaS9zY1BlcnR1cmIvQ3VpSGFjb2hlbjIwMjMvY3l0b2tpbmUtc2FtcGxlczE3LWJhcmNvZGVzLnRzdicsICcvc2NyYXRjaC9wZWlkbGkvc2NQZXJ0dXJiL0N1aUhhY29oZW4yMDIzL2N5dG9raW5lLXNhbXBsZXMxNy1mZWF0dXJlcy50c3YnLCBzdHJpcC5zdWZmaXg9VCkKdGFncyA9IFNldXJhdDo6UmVhZE10eCgnL3NjcmF0Y2gvcGVpZGxpL3NjUGVydHVyYi9DdWlIYWNvaGVuMjAyMy9jeXRva2luZS1oYXNodGFnczE3LW1hdHJpeC5tdHgnLCAnL3NjcmF0Y2gvcGVpZGxpL3NjUGVydHVyYi9DdWlIYWNvaGVuMjAyMy9jeXRva2luZS1oYXNodGFnczE3LWJhcmNvZGVzLnRzdicsICcvc2NyYXRjaC9wZWlkbGkvc2NQZXJ0dXJiL0N1aUhhY29oZW4yMDIzL2N5dG9raW5lLWhhc2h0YWdzMTctZmVhdHVyZXMudHN2JywgZmVhdHVyZS5jb2x1bW49MSkKYGBgCmBgYHtyfQpqb2ludC5iY3MgPC0gaW50ZXJzZWN0KGNvbG5hbWVzKGRhdGEpLCBjb2xuYW1lcyh0YWdzKSkKcHJpbnQobGVuZ3RoKGpvaW50LmJjcykpCmRhdGEgPC0gZGF0YVssIGpvaW50LmJjc10KdGFncyA8LSB0YWdzWywgam9pbnQuYmNzXQp0YWdzIDwtIHRhZ3Nbcm93U3Vtcyh0YWdzKSA+IDEwMDAwLF0gIyBSZW1vdmUgdGFncyB3aXRoIDwxMGsgaGFzaHRhZyBjb3VudHMKcm93U3Vtcyh0YWdzKQpgYGAKCmBgYHtyfQojIENyZWF0ZSBTZXVyYXQgb2JqZWN0IGFuZCBkZW11bHRpcGxleApzZXVyYXRfb2JqZWN0IDwtIFNldXJhdDo6Q3JlYXRlU2V1cmF0T2JqZWN0KGNvdW50cyA9IGRhdGEpCnNldXJhdF9vYmplY3RbWyJIVE8iXV0gPC0gU2V1cmF0OjpDcmVhdGVBc3NheU9iamVjdChjb3VudHMgPSB0YWdzKQpgYGAKCgpgYGB7cn0Kc2V1cmF0X29iamVjdCA8LSBTZXVyYXQ6Ok5vcm1hbGl6ZURhdGEoc2V1cmF0X29iamVjdCwgYXNzYXkgPSAiSFRPIiwgbm9ybWFsaXphdGlvbi5tZXRob2QgPSAiQ0xSIikKc2V1cmF0X29iamVjdCA8LSBTZXVyYXQ6Ok1VTFRJc2VxRGVtdXgoc2V1cmF0X29iamVjdCwgYXNzYXkgPSAiSFRPIikKYGBgCgoKCg==
+ + + +
+ + + + + + + + + + + + + + + + diff --git a/dataset_processing/snakemake/Snakefile b/dataset_processing/snakemake/Snakefile index b371632..4b13d42 100644 --- a/dataset_processing/snakemake/Snakefile +++ b/dataset_processing/snakemake/Snakefile @@ -27,7 +27,8 @@ include: "subworkflows/SunshineHein2023/Snakefile" include: "subworkflows/WesselsSatija2023/Snakefile" include: "subworkflows/LiangWang2023/Snakefile" include: "subworkflows/LotfollahiTheis2023/Snakefile" -include: "subworkflows/KowalskiSatijaPreprint/Snakefile" +include: "subworkflows/CuiHacohen2023/Snakefile" +# include: "subworkflows/KowalskiSatijaPreprint/Snakefile" ### RULES ### rule all: @@ -43,12 +44,13 @@ rule all: rules.WesselsSatija2023.output, rules.LiangWang2023.output, rules.LotfollahiTheis2023.output, - rules.KowalskiSatijaPreprint.output, + rules.CuiHacohen2023.output, # dysfunct # WIP rules.McFalineTrapnell2023.output, # screen 2 too big. OOF??? # rules.WuBassett2023.output, # obtained from original authors, maybe a copy is still left on my charité mac? - + # rules.KowalskiSatijaPreprint.output, # Can't install PASTA package required to open the rds files... who comes up with these stupid ideas? + # deprecated # rules.UrsuBoehm2022.output, # does not have a control diff --git a/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023.py b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023.py new file mode 100644 index 0000000..c9713d0 --- /dev/null +++ b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023.py @@ -0,0 +1,46 @@ +import pandas as pd +import scanpy as sc +import sys + +from tqdm import tqdm +from pathlib import Path + +# Custom functions +sys.path.insert(1, '../../') +from utils import annotate_qc, assert_annotations + +TEMPDIR = Path(snakemake.config['TEMPDIR']) + +# merge +adatas = [] +for f in tqdm(snakemake.input): + adata = sc.read(f) + adatas.append(adata) +adata = sc.concat(adatas, axis=0) + +# Obs +adata.obs.rename({ + 'nCount_RNA': 'ncounts', + 'nFeature_RNA': 'ngenes', + 'nCount_HTO': 'ncounts_tags', + 'filename_prefix': 'sample', + 'processing_batch': 'batch', + 'biological_replicate_number': 'bio_replicate', + 'cytokine': 'perturbation' +}, axis=1, inplace=True) +adata.obs.drop(['nFeature_HTO'], axis=1, inplace=True) +adata.obs.perturbation = adata.obs.perturbation.astype(str) +adata.obs['perturbation'][pd.isna(adata.obs['perturbation'])] = 'control' +adata.obs = adata.obs[['perturbation', 'batch', 'bio_replicate', 'sample', 'ncounts', 'ngenes', 'ncounts_tags', 'hashtag_ID']] +adata.obs['nperts'] = [1-p.count('control') if type(p)==str else 0 for p in adata.obs.perturbation] +adata.obs['perturbation_type'] = 'cytokines' +adata.obs['disease'] = "healthy" +adata.obs['cancer'] = False +adata.obs['tissue_type']="primary" +adata.obs["celltype"] = 'mixed cells from draining lymph nodes' +adata.obs['organism'] = 'mouse' +annotate_qc(adata, species='mouse') +assert_annotations(adata) + +adata.write(snakemake.output[0], compression='gzip') +print('Done.') diff --git a/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023_demux.R b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023_demux.R new file mode 100644 index 0000000..a1f3058 --- /dev/null +++ b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023_demux.R @@ -0,0 +1,25 @@ +library('Seurat') + +# Load the data +data <- Seurat::ReadMtx(snakemake@input[['sample_mtx']], snakemake@input[['sample_barcodes']], snakemake@input[['sample_features']], strip.suffix=T) +tags <- Seurat::ReadMtx(snakemake@input[['tags_mtx']], snakemake@input[['tags_barcodes']], snakemake@input[['tags_features']], feature.column=1) +# data = Seurat::ReadMtx('cytokine-samples17-matrix.mtx', 'cytokine-samples17-barcodes.tsv', 'cytokine-samples17-features.tsv', strip.suffix=T) +# tags = Seurat::ReadMtx('cytokine-hashtags17-matrix.mtx', 'cytokine-hashtags17-barcodes.tsv', 'cytokine-hashtags17-features.tsv', feature.column=1) + +# Subset RNA and HTO counts by joint cell barcodes +joint.bcs <- intersect(colnames(data), colnames(tags)) +print(length(joint.bcs)) +data <- data[, joint.bcs] +tags <- tags[, joint.bcs] +print(rowSums(tags)) +tags <- tags[rowSums(tags) > 10000,] # Remove tags with <10k hashtag counts + +# Create Seurat object and demultiplex +seurat_object <- Seurat::CreateSeuratObject(counts = data) +seurat_object[["HTO"]] <- Seurat::CreateAssayObject(counts = tags) +seurat_object <- Seurat::NormalizeData(seurat_object, assay = "HTO", normalization.method = "CLR") +seurat_object <- Seurat::MULTIseqDemux(seurat_object, assay = "HTO") +print(head(seurat_object[[]])) + +# Save the demultiplexed metadata +write.csv(seurat_object[[]], snakemake@output[['demux']]) \ No newline at end of file diff --git a/dataset_processing/snakemake/subworkflows/CuiHacohen2023/Snakefile b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/Snakefile new file mode 100644 index 0000000..4454142 --- /dev/null +++ b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/Snakefile @@ -0,0 +1,128 @@ +""" +Author: Stefan Peidli +Date: 23.11.2023 +Run: snakemake +""" + +from pathlib import Path +configfile: "../../configuration/config.yaml" + +### PATHS ### +DATADIR = Path(config['DOWNDIR']) # place to store data +TEMPDIR = Path(config['TEMPDIR']) # place to store temporary files (huge files) + +### SAMPLES ### +ids = [f'0{x}' if x < 10 else str(x) for x in range(1, 46) if x != 12] # sample 12 is missing + +# ### RULES ### +rule CuiHacohen2023_download: + output: + temp(expand(TEMPDIR / 'CuiHacohen2023/cytokine-{library}{x}-{mode}', + library=['samples', 'hashtags'], x=ids, + mode=['barcodes.tsv', 'features.tsv', 'matrix.mtx'] + )) + resources: + partititon='short', + time='01:00:00', + mem_mb=8000, + disk_mb=8000 + shell: + """ + cd {TEMPDIR}/CuiHacohen2023 + rm -rf * + wget --recursive --no-parent -nd -R "index.html*" ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE202nnn/GSE202186/suppl/ + tar -xvf GSE202186_RAW.tar + rm GSE202186_RAW.tar + gunzip *.gz + + for file in GSM*.tsv GSM*.mtx; do + new_name=$(echo "$file" | sed 's/^GSM[0-9]*_//') + mv "$file" "$new_name" + done + """ + +rule CuiHacohen2023_demux: + input: + sample_mtx = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-matrix.mtx', + sample_barcodes = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-barcodes.tsv', + sample_features = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-features.tsv', + tags_mtx = TEMPDIR / 'CuiHacohen2023/cytokine-hashtags{x}-matrix.mtx', + tags_barcodes = TEMPDIR / 'CuiHacohen2023/cytokine-hashtags{x}-barcodes.tsv', + tags_features = TEMPDIR / 'CuiHacohen2023/cytokine-hashtags{x}-features.tsv', + output: + demux = TEMPDIR / 'CuiHacohen2023/samples{x}-demux.csv', + conda: 'r_env' + resources: + partititon='short', + time='04:00:00', + mem_mb=16000, + disk_mb=16000 + script: 'CuiHacohen2023_demux.R' + +rule CuiHacohen2023_annotate: + input: + sample_mtx = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-matrix.mtx', + sample_barcodes = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-barcodes.tsv', + sample_features = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-features.tsv', + demux = TEMPDIR / 'CuiHacohen2023/samples{x}-demux.csv', + sample_mapping = TEMPDIR / 'CuiHacohen2023/GSE202186_map-scRNAseq-cytokines-dictionary.xlsx' + output: + temp(TEMPDIR / 'CuiHacohen2023/samples{x}_temp_annotated.h5ad') + resources: + partititon='medium', + time='08:00:00', + mem_mb=32000, + disk_mb=32000 + run: + import pandas as pd + import scanpy as sc + from scipy.sparse import csr_matrix + from scipy.io import mmread + + idx = wildcards.x + sample_mapping = pd.read_excel(input.sample_mapping) + + # read + X = csr_matrix(mmread(input.sample_mtx)).T + obs = pd.read_csv(input.sample_barcodes, sep='\t', index_col=0, names=['cell_barcode']) + var = pd.read_csv(input.sample_features, sep='\t', index_col=1, names=['ensembl_id', 'gene_symbol', 'feature_type']) + + # build + adata = sc.AnnData(X, obs, var) + adata.var_names_make_unique() + adata.obs_names = [x.replace('-1', '') for x in adata.obs_names] + + # add demultiplexing + demux = pd.read_csv(input.demux, index_col=0) + demux = demux[~demux.MULTI_ID.isin(['Negative', 'Doublet', 'unmapped'])].copy() + demux['hashtag_ID'] = [x[4] for x in demux.MULTI_ID] + demux.drop(['orig.ident', 'MULTI_classification', 'MULTI_ID'], axis=1, inplace=True) + obs_ = pd.merge(adata.obs, demux, left_index=True, right_index=True, how='inner') + adata = adata[obs_.index].copy() + adata.obs=obs_ + + # annotate + sample_mapping.hashtag_ID = sample_mapping.hashtag_ID.astype(str) + adata.obs['filename_prefix'] = f'cytokine-samples{idx}' + adata.obs['cell_barcode'] = adata.obs.index + adata.obs = pd.merge(adata.obs, sample_mapping, + left_on=['filename_prefix', 'hashtag_ID'], + right_on=['filename_prefix', 'hashtag_ID'], + how='left' + ).set_index('cell_barcode') + adata.obs_names = [f'{x}-{idx}' for x in adata.obs_names] + + # write + adata.write_h5ad(output[0]) + +rule CuiHacohen2023: + input: + expand(TEMPDIR / 'CuiHacohen2023/samples{x}_temp_annotated.h5ad', x=ids) + output: + DATADIR / 'CuiHacohen2023.h5ad' + resources: + partititon='short', + time='04:00:00', + mem_mb=64000, + disk_mb=64000 + script: 'CuiHacohen2023.py' diff --git a/environments/r_env.yaml b/environments/r_env.yaml index d8f67c6..c9c1616 100644 --- a/environments/r_env.yaml +++ b/environments/r_env.yaml @@ -11,3 +11,4 @@ dependencies: - conda-forge::r-seurat - conda-forge::r-irkernel - conda-forge::r-optparse +# You have to manually install satijalab/PASTA for the data from Kowalski et al. diff --git a/website/datavzrd/scperturb_dataset_info_datavzrd.csv b/website/datavzrd/scperturb_dataset_info_datavzrd.csv index 65f6980..3401c71 100644 --- a/website/datavzrd/scperturb_dataset_info_datavzrd.csv +++ b/website/datavzrd/scperturb_dataset_info_datavzrd.csv @@ -2,7 +2,7 @@ Full index,Download Link,Publication index,Dataset index,Title,doi_url,First Aut AdamsonWeissman2016_GSM2406675_10X001,Download,AdamsonWeissman2016,GSM2406675_10X001,A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response,https://doi.org/10.1016/j.cell.2016.11.048,Britt Adamson,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,Chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,n,01.10.2019,2019 AdamsonWeissman2016_GSM2406677_10X005,Download,AdamsonWeissman2016,GSM2406677_10X005,A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response,https://doi.org/10.1016/j.cell.2016.11.048,Britt Adamson,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,Chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,y,01.10.2019,2019 AdamsonWeissman2016_GSM2406681_10X010,Download,AdamsonWeissman2016,GSM2406681_10X010,A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response,https://doi.org/10.1016/j.cell.2016.11.048,Britt Adamson,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,Chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,y,01.10.2019,2019 -AissaBenevolenskaya2021,Download,AissaBenevolenskaya2021,,"Single-cell transcriptional changes associated with drug tolerance and response to combination therapies in cancer",https://doi.org/10.1038/s41467-021-21884-z,Alexandre Aissa,Homo sapiens; Mus musculus,RNA,7041849,h5ad,Drop-Seq,"cell lines, patient tissue, mouse xenografts",drugs,Lung adenocarcinoma,Lung adenocarcinoma,cell_line,y,no,n,y,01.11.2020,2020 +AissaBenevolenskaya2021,Download,AissaBenevolenskaya2021,,Single-cell transcriptional changes associated with drug tolerance and response to combination therapies in cancer,https://doi.org/10.1038/s41467-021-21884-z,Alexandre Aissa,Homo sapiens; Mus musculus,RNA,7041849,h5ad,Drop-Seq,"cell lines, patient tissue, mouse xenografts",drugs,Lung adenocarcinoma,Lung adenocarcinoma,cell_line,y,no,n,y,01.11.2020,2020 ChangYe2021,Download,ChangYe2021,,Identifying transcriptional programs underlying cancer drug response with TraCe-seq,https://doi.org/10.1038/s41587-021-01005-3,Matthew Chang,Homo sapiens,RNA,7041849,h5ad,clonal tagging (TraCe-seq),"lung and breast cancer cell lines PC9, MCF-10A, MDA-MB-231, NCI-H358, and NCI-H1373",drugs,lung adenocarcinoma,Lung adenocarcinoma,cell_line,y,n,n,n,01.08.2021,2021 DatlingerBock2017,Download,DatlingerBock2017,,Pooled CRISPR screening with single-cell transcriptome readout,https://doi.org/10.1038/nmeth.4177,Paul Datlinger,Homo sapiens; Mus musculus,RNA,7041849,h5ad,CROP-seq,Jurkat cells (immune cancer),"CRISPR-cas9, TCR stim",acute T cell leukemia,T cells,cell_line,y,n,n,y,01.12.2016,2016 DatlingerBock2021,Download,DatlingerBock2021,,Ultra-high-throughput single-cell RNA sequencing and perturbation screening with combinatorial fluidic indexing,https://doi.org/10.1038/s41592-021-01153-z,Paul Datlinger,Homo sapiens,RNA,7041849,h5ad,scifi-RNA-seq (derived from CROP-seq),Jurkat,"CRISPR-cas9, TCR stim",acute T cell leukemia,T cells,cell_line,y,n,n,y,01.12.2019,2019 @@ -12,9 +12,9 @@ FrangiehIzar2021_protein,Download,FrangiehIzar2021,protein,Multimodal pooled Per GasperiniShendure2019_atscale,Download,GasperiniShendure2019,atscale,A Genome-wide Framework for Mapping Gene Regulation via Cellular Genetic Screens,https://doi.org/10.1016/j.cell.2018.11.029,Molly Gasperini,Homo sapiens,RNA,7041849,h5ad,CROP-seq,K562,CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,y,01.06.2018,2018 GasperiniShendure2019_highMOI,Download,GasperiniShendure2019,highMOI,A Genome-wide Framework for Mapping Gene Regulation via Cellular Genetic Screens,https://doi.org/10.1016/j.cell.2018.11.029,Molly Gasperini,Homo sapiens,RNA,7041849,h5ad,CROP-seq,K562,CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,y,01.06.2018,2018 GasperiniShendure2019_lowMOI,Download,GasperiniShendure2019,lowMOI,A Genome-wide Framework for Mapping Gene Regulation via Cellular Genetic Screens,https://doi.org/10.1016/j.cell.2018.11.029,Molly Gasperini,Homo sapiens,RNA,7041849,h5ad,CROP-seq,K562,CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,y,01.06.2018,2018 -GehringPachter2019,Download,GehringPachter2019,,Highly multiplexed single-cell RNA-seq by DNA oligonucleotide tagging of cellular proteins,https://doi.org/10.1038/s41587-019-0372-z,Jase Gehring,Mus musculus,RNA,7041849,h5ad,"96-plex scRNA-seq oligo barcodes",mouse neural stem cells (human only for species mixing experiment),drugs,healthy,neural stem cells,primary,n,n,y,y,01.01.2020,2020 -Liscovitch-BrauerSanjana2021_K562_1,Download,Liscovitch-BrauerSanjana2021,K562_1,"Profiling the genetic determinants of chromatin accessibility with scalable single-cell CRISPR screens",https://doi.org/10.1038/s41587-021-00902-x,"Liscovitch-Brauer, N.",Homo sapiens,ATAC,7058382,zip,CRISPR–sciATAC,Myeloid Leukemia,CRISPR-cas9,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.04.2021,2021 -Liscovitch-BrauerSanjana2021_K562_2,Download,Liscovitch-BrauerSanjana2021,K562_2,"Profiling the genetic determinants of chromatin accessibility with scalable single-cell CRISPR screens",https://doi.org/10.1038/s41587-021-00902-x,"Liscovitch-Brauer, N.",Homo sapiens,ATAC,7058382,zip,CRISPR–sciATAC,Myeloid Leukemia,CRISPR-cas9,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.04.2021,2021 +GehringPachter2019,Download,GehringPachter2019,,Highly multiplexed single-cell RNA-seq by DNA oligonucleotide tagging of cellular proteins,https://doi.org/10.1038/s41587-019-0372-z,Jase Gehring,Mus musculus,RNA,7041849,h5ad,96-plex scRNA-seq oligo barcodes,mouse neural stem cells (human only for species mixing experiment),drugs,healthy,neural stem cells,primary,n,n,y,y,01.01.2020,2020 +Liscovitch-BrauerSanjana2021_K562_1,Download,Liscovitch-BrauerSanjana2021,K562_1,Profiling the genetic determinants of chromatin accessibility with scalable single-cell CRISPR screens,https://doi.org/10.1038/s41587-021-00902-x,"Liscovitch-Brauer, N.",Homo sapiens,ATAC,7058382,zip,CRISPR–sciATAC,Myeloid Leukemia,CRISPR-cas9,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.04.2021,2021 +Liscovitch-BrauerSanjana2021_K562_2,Download,Liscovitch-BrauerSanjana2021,K562_2,Profiling the genetic determinants of chromatin accessibility with scalable single-cell CRISPR screens,https://doi.org/10.1038/s41587-021-00902-x,"Liscovitch-Brauer, N.",Homo sapiens,ATAC,7058382,zip,CRISPR–sciATAC,Myeloid Leukemia,CRISPR-cas9,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.04.2021,2021 McFarlandTsherniak2020,Download,McFarlandTsherniak2020,,Multiplexed single-cell transcriptional response profiling to define cancer vulnerabilities and therapeutic mechanism of action,https://doi.org/10.1038/s41467-020-17440-w,James McFarland,Homo sapiens,RNA,7041849,h5ad,MIX-seq (pooled scRNA-seq),200 different cell lines,"drugs, CRISPR-cas9",NA,NA,cell_line,y,partially,n,n,01.12.2019,2019 MimitouSmibert2021,Download,MimitouSmibert2021,,"Scalable, multimodal profiling of chromatin accessibility, gene expression and protein levels in single cells",https://doi.org/10.1038/s41587-021-00927-2,Eleni P. Mimitou,Homo sapiens,ATAC + protein,7041849,h5ad,ASAP-seq,Purified CD4 T cells,CRISPR-cas9,healthy,T cells,primary,n,n,n,n,01.05.2019,2019 NormanWeissman2019_filtered,Download,NormanWeissman2019,filtered,Exploring genetic interaction manifolds constructed from rich single-cell phenotypes,https://doi.org/10.1126/science.aax4438,Thomas Norman,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRa,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,y,01.03.2019,2019 @@ -22,9 +22,9 @@ PapalexiSatija2021_eccite_arrayed_protein,Download,PapalexiSatija2021,eccite_arr PapalexiSatija2021_eccite_arrayed_RNA,Download,PapalexiSatija2021,eccite_arrayed_RNA,Characterizing the molecular regulation of inhibitory immune checkpoints with multimodal single-cell screens,https://doi.org/10.1038/s41588-021-00778-2,"Papalexi, E.",Homo sapiens THP-1,RNA + protein (RNA),7041849,h5ad,ECCITE-seq,THP-1 (monocytic leukemia ),CRISPR-cas9,acute monocytic leukemia,monocytes,cell_line,y,n,n,n,01.03.2021,2021 PapalexiSatija2021_eccite_protein,Download,PapalexiSatija2021,eccite_protein,Characterizing the molecular regulation of inhibitory immune checkpoints with multimodal single-cell screens,https://doi.org/10.1038/s41588-021-00778-2,"Papalexi, E.",Homo sapiens THP-1,RNA + protein (protein),7041849,h5ad,ECCITE-seq,THP-1 (monocytic leukemia ),CRISPR-cas9,acute monocytic leukemia,monocytes,cell_line,y,n,n,n,01.03.2021,2021 PapalexiSatija2021_eccite_RNA,Download,PapalexiSatija2021,eccite_RNA,Characterizing the molecular regulation of inhibitory immune checkpoints with multimodal single-cell screens,https://doi.org/10.1038/s41588-021-00778-2,"Papalexi, E.",Homo sapiens THP-1,RNA + protein (RNA),7041849,h5ad,ECCITE-seq,THP-1 (monocytic leukemia ),CRISPR-cas9,acute monocytic leukemia,monocytes,cell_line,y,n,n,n,01.03.2021,2021 -PierceGreenleaf2021_K562,Download,PierceGreenleaf2021,K562,"High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer",https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,K562 (leukemia),CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.11.2020,2020 -PierceGreenleaf2021_MCF7,Download,PierceGreenleaf2021,MCF7,"High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer",https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,MCF7 (breast cancer),CRISPRi,breast adenocarcinoma,mammary epithelial cells,cell_line,y,n,n,n,01.11.2020,2020 -PierceGreenleaf2021_GM12878,Download,PierceGreenleaf2021,GM12878,"High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer",https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,GM12878 (lymphoblastoid cells),CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.11.2020,2020 +PierceGreenleaf2021_K562,Download,PierceGreenleaf2021,K562,High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer,https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,K562 (leukemia),CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.11.2020,2020 +PierceGreenleaf2021_MCF7,Download,PierceGreenleaf2021,MCF7,High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer,https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,MCF7 (breast cancer),CRISPRi,breast adenocarcinoma,mammary epithelial cells,cell_line,y,n,n,n,01.11.2020,2020 +PierceGreenleaf2021_GM12878,Download,PierceGreenleaf2021,GM12878,High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer,https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,GM12878 (lymphoblastoid cells),CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.11.2020,2020 ReplogleWeissman2022_K562_essential,Download,ReplogleWeissman2022,K562_essential,Mapping information-rich genotype-phenotype landscapes with genome-scale Perturb-seq,https://doi.org/10.1016/j.cell.2022.05.013,Joseph Replogle,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,n,01.10.2022,2022 ReplogleWeissman2022_K562_gwps,Download,ReplogleWeissman2022,K562_gwps,Mapping information-rich genotype-phenotype landscapes with genome-scale Perturb-seq,https://doi.org/10.1016/j.cell.2022.05.013,Joseph Replogle,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,n,01.10.2022,2022 ReplogleWeissman2022_rpe1,Download,ReplogleWeissman2022,rpe1,Mapping information-rich genotype-phenotype landscapes with genome-scale Perturb-seq,https://doi.org/10.1016/j.cell.2022.05.013,Joseph Replogle,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,,CRISPRi,chronic myeloid leukemia,epithelial,cell_line,n,n,n,n,01.10.2022,2022 @@ -49,10 +49,11 @@ JoungZhang2023_combinatorial,Download,JoungZhang2023,combinatorial,A transcripti YaoCleary2023,Download,YaoCleary2023,,Scalable genetic screening for regulatory circuits using compressed Perturb-seq,https://doi.org/10.1101/2023.01.23.525200,Douglas Yao,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,THP-1 (monocytic leukemia ),"CRISPR-cas9, CRISPRi",,monocytes,cell_line,y,,,,,2023 QinTape2023_scRNAseq,Download,QinTape2023,scRNAseq,A Single-cell Perturbation Landscape of Colonic Stem Cell Polarisation,https://doi.org/10.1101/2023.02.15.528008,Xiao Qin,Mus musculus,RNA + protein (RNA),7041849,h5ad,CPA-Perturb-seq,colonic organoids,"drugs, genotype and co-cultures",,colonic epithelial cells,organoid,y,,,,,2023 SantinhaPlatt2023,Download,SantinhaPlatt2023,,Transcriptional linkage analysis with in vivo AAV-Perturb-seq,https://doi.org/10.1038/s41586-023-06570-y,Antonio J. Santinha,Mus musculus,RNA,7041849,h5ad,adeno-associated virus (AAV)-mediated direct in vivo single-cell CRISPR screening,brain tissue,CRISPR-cas9,healthy,neurons,primary,y,,,,,2023 -LaraAstiasoHuntly2023_leukemia,Download,LaraAstiasoHuntly2023,leukemia,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,"https://doi.org/10.1038/s41588-023-01471-2",David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,leukemia,bone marrow,primary,y,n,n,n,,2023 -LaraAstiasoHuntly2023_invivo,Download,LaraAstiasoHuntly2023,invivo,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,"https://doi.org/10.1038/s41588-023-01471-2",David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,healthy,bone marrow,primary,n,y,n,n,,2023 -LaraAstiasoHuntly2023_exvivo,Download,LaraAstiasoHuntly2023,exvivo,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,"https://doi.org/10.1038/s41588-023-01471-2",David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,healthy,bone marrow,primary,n,y,n,n,,2023 +LaraAstiasoHuntly2023_leukemia,Download,LaraAstiasoHuntly2023,leukemia,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,https://doi.org/10.1038/s41588-023-01471-2,David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,leukemia,bone marrow,primary,y,n,n,n,,2023 +LaraAstiasoHuntly2023_invivo,Download,LaraAstiasoHuntly2023,invivo,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,https://doi.org/10.1038/s41588-023-01471-2,David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,healthy,bone marrow,primary,n,y,n,n,,2023 +LaraAstiasoHuntly2023_exvivo,Download,LaraAstiasoHuntly2023,exvivo,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,https://doi.org/10.1038/s41588-023-01471-2,David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,healthy,bone marrow,primary,n,y,n,n,,2023 SunshineHein2023,Download,SunshineHein2023,,Systematic functional interrogation of SARS-CoV-2 host factors using Perturb-seq,https://doi.org/10.1038/s41467-023-41788-4,Sara Sunshine,Homo sapiens,RNA,7041849,h5ad,Perturb-seq with CRISPRi,Calu-3,CRISPRi,lung adenocarcinoma and SARS-CoV-2,lung epithelial cells,cell_line,y,n,n,y,,2023 WesselsSatija2023,Download,WesselsSatija2023,,Efficient combinatorial targeting of RNA transcripts in single cells with Cas13 RNA Perturb-seq ,https://doi.org/10.1038/s41592-022-01705-x,Hans-Hermann Wessels,Homo sapiens; Mus musculus,RNA,7041849,h5ad,CaRPool-seq,"AML cell lines (HEK293FT, NIH/3T3, or THP1 cells)",CRISPR-cas13,AML,myeloid cells,cell_line,y,n,n,y,,2023 LiangWang2023,Download,LiangWang2023,,In-organoid single-cell CRISPR screening reveals determinants of hepatocyte differentiation and maturation,https://doi.org/10.1186/s13059-023-03084-8,Junbo Liang,Mus musculus,RNA,7041849,h5ad,CROP-seq,liver organoids,"CRISPR-cas9, growth conditions",healthy,hepatocytes,organoid,n,n,n,y,,2023 -LotfollahiTheis2023,Download,LotfollahiTheis2023,,Predicting cellular responses to complex perturbations in high-throughput screens,https://doi.org/10.15252/msb.202211517,Mohammad Lotfollahi,Homo sapiens,RNA,7041849,h5ad,sciplex,A549,drugs,lung adenocarcinoma,lung epithelial cells,cell_line,y,n,n,y,,2023 \ No newline at end of file +LotfollahiTheis2023,Download,LotfollahiTheis2023,,Predicting cellular responses to complex perturbations in high-throughput screens,https://doi.org/10.15252/msb.202211517,Mohammad Lotfollahi,Homo sapiens,RNA,7041849,h5ad,sciplex,A549,drugs,lung adenocarcinoma,lung epithelial cells,cell_line,y,n,n,y,,2023 +CuiHacohen2023,Download,CuiHacohen2023,,Dictionary of immune responses to cytokines at single-cell resolution,https://doi.org/10.1038/s41586-023-06816-9,Ang Cui,Mus musculus,RNA,7041849,h5ad,Custom,draining lymph nodes,cytokines,healthy,mixed,primary,n,n,n,n,,2023 \ No newline at end of file diff --git a/website/datavzrd/scperturb_dataset_info_datavzrd_annotated.csv b/website/datavzrd/scperturb_dataset_info_datavzrd_annotated.csv index 9ddec1f..b7381d9 100644 --- a/website/datavzrd/scperturb_dataset_info_datavzrd_annotated.csv +++ b/website/datavzrd/scperturb_dataset_info_datavzrd_annotated.csv @@ -4,6 +4,7 @@ AdamsonWeissman2016_GSM2406677_10X005,Download,AdamsonWeissman2016,GSM2406677_10 AdamsonWeissman2016_GSM2406681_10X010,Download,AdamsonWeissman2016,GSM2406681_10X010,A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response,https://doi.org/10.1016/j.cell.2016.11.048,Britt Adamson,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,Chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,y,01.10.2019,2019,65337.0,113.0,3690.0,15355.0,2.0,554.0,1.0,6010.0 AissaBenevolenskaya2021,Download,AissaBenevolenskaya2021,,Single-cell transcriptional changes associated with drug tolerance and response to combination therapies in cancer,https://doi.org/10.1038/s41467-021-21884-z,Alexandre Aissa,Homo sapiens; Mus musculus,RNA,7041849,h5ad,Drop-Seq,"cell lines, patient tissue, mouse xenografts",drugs,Lung adenocarcinoma,Lung adenocarcinoma,cell_line,y,no,n,y,01.11.2020,2020,119071.0,3.0,49.0,75.0,1.0,29874.0,29340.0,29983.0 ChangYe2021,Download,ChangYe2021,,Identifying transcriptional programs underlying cancer drug response with TraCe-seq,https://doi.org/10.1038/s41587-021-01005-3,Matthew Chang,Homo sapiens,RNA,7041849,h5ad,clonal tagging (TraCe-seq),"lung and breast cancer cell lines PC9, MCF-10A, MDA-MB-231, NCI-H358, and NCI-H1373",drugs,lung adenocarcinoma,Lung adenocarcinoma,cell_line,y,n,n,n,01.08.2021,2021,42277.0,3.0,5278.0,18961.0,1.0,7657.0,5920.0,21043.0 +CuiHacohen2023,Download,CuiHacohen2023,,Dictionary of immune responses to cytokines at single-cell resolution,https://doi.org/10.1038/s41586-023-06816-9,Ang Cui,Mus musculus,RNA,7041849,h5ad,Custom,draining lymph nodes,cytokines,healthy,mixed,primary,n,n,n,n,,2023,539717.0,87.0,119.0,144.0,1.0,681.5,202.0,349669.0 DatlingerBock2017,Download,DatlingerBock2017,,Pooled CRISPR screening with single-cell transcriptome readout,https://doi.org/10.1038/nmeth.4177,Paul Datlinger,Homo sapiens; Mus musculus,RNA,7041849,h5ad,CROP-seq,Jurkat cells (immune cancer),"CRISPR-cas9, TCR stim",acute T cell leukemia,T cells,cell_line,y,n,n,y,01.12.2016,2016,5905.0,96.0,2713.0,6711.0,4.0,49.0,3.0,1320.0 DatlingerBock2021,Download,DatlingerBock2021,,Ultra-high-throughput single-cell RNA sequencing and perturbation screening with combinatorial fluidic indexing,https://doi.org/10.1038/s41592-021-01153-z,Paul Datlinger,Homo sapiens,RNA,7041849,h5ad,scifi-RNA-seq (derived from CROP-seq),Jurkat,"CRISPR-cas9, TCR stim",acute T cell leukemia,T cells,cell_line,y,n,n,y,01.12.2019,2019,39194.0,40.0,179.0,460.0,2.0,809.0,213.0,4497.0 DixitRegev2016,Download,DixitRegev2016,,Perturb-Seq: Dissecting Molecular Circuits with Scalable Single-Cell RNA Profiling of Pooled Genetic Screens,https://doi.org/10.1016/j.cell.2016.11.038,Atray Dixit,Homo sapiens; Mus musculus,RNA,7041849,h5ad,Perturb-seq,K562,CRISPR-cas9,myelogenous leukemia,lymphoblasts,,y,partially (TF knockout screen 7 and 13 dpi),n,y,01.11.2016,2016,51898.0,1728.0,3080.0,13974.0,4.0,1.0,1.0,4224.0