+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
setwd("/scratch/peidli/scPerturb/CuiHacohen2023/")
+
+
+
+
Warning: The working directory was changed to /scratch/peidli/scPerturb/CuiHacohen2023 inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
+
+
+
+
+
+
+
+
+
# Create Seurat object and demultiplex
+seurat_object <- Seurat::CreateSeuratObject(counts = data)
+seurat_object[["HTO"]] <- Seurat::CreateAssayObject(counts = tags)
+
+
+
+
+
+
+
seurat_object <- Seurat::NormalizeData(seurat_object, assay = "HTO", normalization.method = "CLR")
+seurat_object <- Seurat::MULTIseqDemux(seurat_object, assay = "HTO")
+
+
+
+
+
+
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQpgYGB7cn0KbGlicmFyeShTZXVyYXQpCmBgYAoKCmBgYHtyfQpkYXRhID0gU2V1cmF0OjpSZWFkTXR4KCcvc2NyYXRjaC9wZWlkbGkvc2NQZXJ0dXJiL0N1aUhhY29oZW4yMDIzL2N5dG9raW5lLXNhbXBsZXMxNy1tYXRyaXgubXR4JywgJy9zY3JhdGNoL3BlaWRsaS9zY1BlcnR1cmIvQ3VpSGFjb2hlbjIwMjMvY3l0b2tpbmUtc2FtcGxlczE3LWJhcmNvZGVzLnRzdicsICcvc2NyYXRjaC9wZWlkbGkvc2NQZXJ0dXJiL0N1aUhhY29oZW4yMDIzL2N5dG9raW5lLXNhbXBsZXMxNy1mZWF0dXJlcy50c3YnLCBzdHJpcC5zdWZmaXg9VCkKdGFncyA9IFNldXJhdDo6UmVhZE10eCgnL3NjcmF0Y2gvcGVpZGxpL3NjUGVydHVyYi9DdWlIYWNvaGVuMjAyMy9jeXRva2luZS1oYXNodGFnczE3LW1hdHJpeC5tdHgnLCAnL3NjcmF0Y2gvcGVpZGxpL3NjUGVydHVyYi9DdWlIYWNvaGVuMjAyMy9jeXRva2luZS1oYXNodGFnczE3LWJhcmNvZGVzLnRzdicsICcvc2NyYXRjaC9wZWlkbGkvc2NQZXJ0dXJiL0N1aUhhY29oZW4yMDIzL2N5dG9raW5lLWhhc2h0YWdzMTctZmVhdHVyZXMudHN2JywgZmVhdHVyZS5jb2x1bW49MSkKYGBgCmBgYHtyfQpqb2ludC5iY3MgPC0gaW50ZXJzZWN0KGNvbG5hbWVzKGRhdGEpLCBjb2xuYW1lcyh0YWdzKSkKcHJpbnQobGVuZ3RoKGpvaW50LmJjcykpCmRhdGEgPC0gZGF0YVssIGpvaW50LmJjc10KdGFncyA8LSB0YWdzWywgam9pbnQuYmNzXQp0YWdzIDwtIHRhZ3Nbcm93U3Vtcyh0YWdzKSA+IDEwMDAwLF0gIyBSZW1vdmUgdGFncyB3aXRoIDwxMGsgaGFzaHRhZyBjb3VudHMKcm93U3Vtcyh0YWdzKQpgYGAKCmBgYHtyfQojIENyZWF0ZSBTZXVyYXQgb2JqZWN0IGFuZCBkZW11bHRpcGxleApzZXVyYXRfb2JqZWN0IDwtIFNldXJhdDo6Q3JlYXRlU2V1cmF0T2JqZWN0KGNvdW50cyA9IGRhdGEpCnNldXJhdF9vYmplY3RbWyJIVE8iXV0gPC0gU2V1cmF0OjpDcmVhdGVBc3NheU9iamVjdChjb3VudHMgPSB0YWdzKQpgYGAKCgpgYGB7cn0Kc2V1cmF0X29iamVjdCA8LSBTZXVyYXQ6Ok5vcm1hbGl6ZURhdGEoc2V1cmF0X29iamVjdCwgYXNzYXkgPSAiSFRPIiwgbm9ybWFsaXphdGlvbi5tZXRob2QgPSAiQ0xSIikKc2V1cmF0X29iamVjdCA8LSBTZXVyYXQ6Ok1VTFRJc2VxRGVtdXgoc2V1cmF0X29iamVjdCwgYXNzYXkgPSAiSFRPIikKYGBgCgoKCg==
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dataset_processing/snakemake/Snakefile b/dataset_processing/snakemake/Snakefile
index b371632..4b13d42 100644
--- a/dataset_processing/snakemake/Snakefile
+++ b/dataset_processing/snakemake/Snakefile
@@ -27,7 +27,8 @@ include: "subworkflows/SunshineHein2023/Snakefile"
include: "subworkflows/WesselsSatija2023/Snakefile"
include: "subworkflows/LiangWang2023/Snakefile"
include: "subworkflows/LotfollahiTheis2023/Snakefile"
-include: "subworkflows/KowalskiSatijaPreprint/Snakefile"
+include: "subworkflows/CuiHacohen2023/Snakefile"
+# include: "subworkflows/KowalskiSatijaPreprint/Snakefile"
### RULES ###
rule all:
@@ -43,12 +44,13 @@ rule all:
rules.WesselsSatija2023.output,
rules.LiangWang2023.output,
rules.LotfollahiTheis2023.output,
- rules.KowalskiSatijaPreprint.output,
+ rules.CuiHacohen2023.output,
# dysfunct
# WIP rules.McFalineTrapnell2023.output, # screen 2 too big. OOF???
# rules.WuBassett2023.output, # obtained from original authors, maybe a copy is still left on my charité mac?
-
+ # rules.KowalskiSatijaPreprint.output, # Can't install PASTA package required to open the rds files... who comes up with these stupid ideas?
+
# deprecated
# rules.UrsuBoehm2022.output, # does not have a control
diff --git a/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023.py b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023.py
new file mode 100644
index 0000000..c9713d0
--- /dev/null
+++ b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023.py
@@ -0,0 +1,46 @@
+import pandas as pd
+import scanpy as sc
+import sys
+
+from tqdm import tqdm
+from pathlib import Path
+
+# Custom functions
+sys.path.insert(1, '../../')
+from utils import annotate_qc, assert_annotations
+
+TEMPDIR = Path(snakemake.config['TEMPDIR'])
+
+# merge
+adatas = []
+for f in tqdm(snakemake.input):
+ adata = sc.read(f)
+ adatas.append(adata)
+adata = sc.concat(adatas, axis=0)
+
+# Obs
+adata.obs.rename({
+ 'nCount_RNA': 'ncounts',
+ 'nFeature_RNA': 'ngenes',
+ 'nCount_HTO': 'ncounts_tags',
+ 'filename_prefix': 'sample',
+ 'processing_batch': 'batch',
+ 'biological_replicate_number': 'bio_replicate',
+ 'cytokine': 'perturbation'
+}, axis=1, inplace=True)
+adata.obs.drop(['nFeature_HTO'], axis=1, inplace=True)
+adata.obs.perturbation = adata.obs.perturbation.astype(str)
+adata.obs['perturbation'][pd.isna(adata.obs['perturbation'])] = 'control'
+adata.obs = adata.obs[['perturbation', 'batch', 'bio_replicate', 'sample', 'ncounts', 'ngenes', 'ncounts_tags', 'hashtag_ID']]
+adata.obs['nperts'] = [1-p.count('control') if type(p)==str else 0 for p in adata.obs.perturbation]
+adata.obs['perturbation_type'] = 'cytokines'
+adata.obs['disease'] = "healthy"
+adata.obs['cancer'] = False
+adata.obs['tissue_type']="primary"
+adata.obs["celltype"] = 'mixed cells from draining lymph nodes'
+adata.obs['organism'] = 'mouse'
+annotate_qc(adata, species='mouse')
+assert_annotations(adata)
+
+adata.write(snakemake.output[0], compression='gzip')
+print('Done.')
diff --git a/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023_demux.R b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023_demux.R
new file mode 100644
index 0000000..a1f3058
--- /dev/null
+++ b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/CuiHacohen2023_demux.R
@@ -0,0 +1,25 @@
+library('Seurat')
+
+# Load the data
+data <- Seurat::ReadMtx(snakemake@input[['sample_mtx']], snakemake@input[['sample_barcodes']], snakemake@input[['sample_features']], strip.suffix=T)
+tags <- Seurat::ReadMtx(snakemake@input[['tags_mtx']], snakemake@input[['tags_barcodes']], snakemake@input[['tags_features']], feature.column=1)
+# data = Seurat::ReadMtx('cytokine-samples17-matrix.mtx', 'cytokine-samples17-barcodes.tsv', 'cytokine-samples17-features.tsv', strip.suffix=T)
+# tags = Seurat::ReadMtx('cytokine-hashtags17-matrix.mtx', 'cytokine-hashtags17-barcodes.tsv', 'cytokine-hashtags17-features.tsv', feature.column=1)
+
+# Subset RNA and HTO counts by joint cell barcodes
+joint.bcs <- intersect(colnames(data), colnames(tags))
+print(length(joint.bcs))
+data <- data[, joint.bcs]
+tags <- tags[, joint.bcs]
+print(rowSums(tags))
+tags <- tags[rowSums(tags) > 10000,] # Remove tags with <10k hashtag counts
+
+# Create Seurat object and demultiplex
+seurat_object <- Seurat::CreateSeuratObject(counts = data)
+seurat_object[["HTO"]] <- Seurat::CreateAssayObject(counts = tags)
+seurat_object <- Seurat::NormalizeData(seurat_object, assay = "HTO", normalization.method = "CLR")
+seurat_object <- Seurat::MULTIseqDemux(seurat_object, assay = "HTO")
+print(head(seurat_object[[]]))
+
+# Save the demultiplexed metadata
+write.csv(seurat_object[[]], snakemake@output[['demux']])
\ No newline at end of file
diff --git a/dataset_processing/snakemake/subworkflows/CuiHacohen2023/Snakefile b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/Snakefile
new file mode 100644
index 0000000..4454142
--- /dev/null
+++ b/dataset_processing/snakemake/subworkflows/CuiHacohen2023/Snakefile
@@ -0,0 +1,128 @@
+"""
+Author: Stefan Peidli
+Date: 23.11.2023
+Run: snakemake
+"""
+
+from pathlib import Path
+configfile: "../../configuration/config.yaml"
+
+### PATHS ###
+DATADIR = Path(config['DOWNDIR']) # place to store data
+TEMPDIR = Path(config['TEMPDIR']) # place to store temporary files (huge files)
+
+### SAMPLES ###
+ids = [f'0{x}' if x < 10 else str(x) for x in range(1, 46) if x != 12] # sample 12 is missing
+
+# ### RULES ###
+rule CuiHacohen2023_download:
+ output:
+ temp(expand(TEMPDIR / 'CuiHacohen2023/cytokine-{library}{x}-{mode}',
+ library=['samples', 'hashtags'], x=ids,
+ mode=['barcodes.tsv', 'features.tsv', 'matrix.mtx']
+ ))
+ resources:
+ partititon='short',
+ time='01:00:00',
+ mem_mb=8000,
+ disk_mb=8000
+ shell:
+ """
+ cd {TEMPDIR}/CuiHacohen2023
+ rm -rf *
+ wget --recursive --no-parent -nd -R "index.html*" ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE202nnn/GSE202186/suppl/
+ tar -xvf GSE202186_RAW.tar
+ rm GSE202186_RAW.tar
+ gunzip *.gz
+
+ for file in GSM*.tsv GSM*.mtx; do
+ new_name=$(echo "$file" | sed 's/^GSM[0-9]*_//')
+ mv "$file" "$new_name"
+ done
+ """
+
+rule CuiHacohen2023_demux:
+ input:
+ sample_mtx = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-matrix.mtx',
+ sample_barcodes = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-barcodes.tsv',
+ sample_features = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-features.tsv',
+ tags_mtx = TEMPDIR / 'CuiHacohen2023/cytokine-hashtags{x}-matrix.mtx',
+ tags_barcodes = TEMPDIR / 'CuiHacohen2023/cytokine-hashtags{x}-barcodes.tsv',
+ tags_features = TEMPDIR / 'CuiHacohen2023/cytokine-hashtags{x}-features.tsv',
+ output:
+ demux = TEMPDIR / 'CuiHacohen2023/samples{x}-demux.csv',
+ conda: 'r_env'
+ resources:
+ partititon='short',
+ time='04:00:00',
+ mem_mb=16000,
+ disk_mb=16000
+ script: 'CuiHacohen2023_demux.R'
+
+rule CuiHacohen2023_annotate:
+ input:
+ sample_mtx = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-matrix.mtx',
+ sample_barcodes = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-barcodes.tsv',
+ sample_features = TEMPDIR / 'CuiHacohen2023/cytokine-samples{x}-features.tsv',
+ demux = TEMPDIR / 'CuiHacohen2023/samples{x}-demux.csv',
+ sample_mapping = TEMPDIR / 'CuiHacohen2023/GSE202186_map-scRNAseq-cytokines-dictionary.xlsx'
+ output:
+ temp(TEMPDIR / 'CuiHacohen2023/samples{x}_temp_annotated.h5ad')
+ resources:
+ partititon='medium',
+ time='08:00:00',
+ mem_mb=32000,
+ disk_mb=32000
+ run:
+ import pandas as pd
+ import scanpy as sc
+ from scipy.sparse import csr_matrix
+ from scipy.io import mmread
+
+ idx = wildcards.x
+ sample_mapping = pd.read_excel(input.sample_mapping)
+
+ # read
+ X = csr_matrix(mmread(input.sample_mtx)).T
+ obs = pd.read_csv(input.sample_barcodes, sep='\t', index_col=0, names=['cell_barcode'])
+ var = pd.read_csv(input.sample_features, sep='\t', index_col=1, names=['ensembl_id', 'gene_symbol', 'feature_type'])
+
+ # build
+ adata = sc.AnnData(X, obs, var)
+ adata.var_names_make_unique()
+ adata.obs_names = [x.replace('-1', '') for x in adata.obs_names]
+
+ # add demultiplexing
+ demux = pd.read_csv(input.demux, index_col=0)
+ demux = demux[~demux.MULTI_ID.isin(['Negative', 'Doublet', 'unmapped'])].copy()
+ demux['hashtag_ID'] = [x[4] for x in demux.MULTI_ID]
+ demux.drop(['orig.ident', 'MULTI_classification', 'MULTI_ID'], axis=1, inplace=True)
+ obs_ = pd.merge(adata.obs, demux, left_index=True, right_index=True, how='inner')
+ adata = adata[obs_.index].copy()
+ adata.obs=obs_
+
+ # annotate
+ sample_mapping.hashtag_ID = sample_mapping.hashtag_ID.astype(str)
+ adata.obs['filename_prefix'] = f'cytokine-samples{idx}'
+ adata.obs['cell_barcode'] = adata.obs.index
+ adata.obs = pd.merge(adata.obs, sample_mapping,
+ left_on=['filename_prefix', 'hashtag_ID'],
+ right_on=['filename_prefix', 'hashtag_ID'],
+ how='left'
+ ).set_index('cell_barcode')
+ adata.obs_names = [f'{x}-{idx}' for x in adata.obs_names]
+
+ # write
+ adata.write_h5ad(output[0])
+
+rule CuiHacohen2023:
+ input:
+ expand(TEMPDIR / 'CuiHacohen2023/samples{x}_temp_annotated.h5ad', x=ids)
+ output:
+ DATADIR / 'CuiHacohen2023.h5ad'
+ resources:
+ partititon='short',
+ time='04:00:00',
+ mem_mb=64000,
+ disk_mb=64000
+ script: 'CuiHacohen2023.py'
diff --git a/environments/r_env.yaml b/environments/r_env.yaml
index d8f67c6..c9c1616 100644
--- a/environments/r_env.yaml
+++ b/environments/r_env.yaml
@@ -11,3 +11,4 @@ dependencies:
- conda-forge::r-seurat
- conda-forge::r-irkernel
- conda-forge::r-optparse
+# You have to manually install satijalab/PASTA for the data from Kowalski et al.
diff --git a/website/datavzrd/scperturb_dataset_info_datavzrd.csv b/website/datavzrd/scperturb_dataset_info_datavzrd.csv
index 65f6980..3401c71 100644
--- a/website/datavzrd/scperturb_dataset_info_datavzrd.csv
+++ b/website/datavzrd/scperturb_dataset_info_datavzrd.csv
@@ -2,7 +2,7 @@ Full index,Download Link,Publication index,Dataset index,Title,doi_url,First Aut
AdamsonWeissman2016_GSM2406675_10X001,Download,AdamsonWeissman2016,GSM2406675_10X001,A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response,https://doi.org/10.1016/j.cell.2016.11.048,Britt Adamson,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,Chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,n,01.10.2019,2019
AdamsonWeissman2016_GSM2406677_10X005,Download,AdamsonWeissman2016,GSM2406677_10X005,A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response,https://doi.org/10.1016/j.cell.2016.11.048,Britt Adamson,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,Chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,y,01.10.2019,2019
AdamsonWeissman2016_GSM2406681_10X010,Download,AdamsonWeissman2016,GSM2406681_10X010,A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response,https://doi.org/10.1016/j.cell.2016.11.048,Britt Adamson,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,Chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,y,01.10.2019,2019
-AissaBenevolenskaya2021,Download,AissaBenevolenskaya2021,,"Single-cell transcriptional changes associated with drug tolerance and response to combination therapies in cancer",https://doi.org/10.1038/s41467-021-21884-z,Alexandre Aissa,Homo sapiens; Mus musculus,RNA,7041849,h5ad,Drop-Seq,"cell lines, patient tissue, mouse xenografts",drugs,Lung adenocarcinoma,Lung adenocarcinoma,cell_line,y,no,n,y,01.11.2020,2020
+AissaBenevolenskaya2021,Download,AissaBenevolenskaya2021,,Single-cell transcriptional changes associated with drug tolerance and response to combination therapies in cancer,https://doi.org/10.1038/s41467-021-21884-z,Alexandre Aissa,Homo sapiens; Mus musculus,RNA,7041849,h5ad,Drop-Seq,"cell lines, patient tissue, mouse xenografts",drugs,Lung adenocarcinoma,Lung adenocarcinoma,cell_line,y,no,n,y,01.11.2020,2020
ChangYe2021,Download,ChangYe2021,,Identifying transcriptional programs underlying cancer drug response with TraCe-seq,https://doi.org/10.1038/s41587-021-01005-3,Matthew Chang,Homo sapiens,RNA,7041849,h5ad,clonal tagging (TraCe-seq),"lung and breast cancer cell lines PC9, MCF-10A, MDA-MB-231, NCI-H358, and NCI-H1373",drugs,lung adenocarcinoma,Lung adenocarcinoma,cell_line,y,n,n,n,01.08.2021,2021
DatlingerBock2017,Download,DatlingerBock2017,,Pooled CRISPR screening with single-cell transcriptome readout,https://doi.org/10.1038/nmeth.4177,Paul Datlinger,Homo sapiens; Mus musculus,RNA,7041849,h5ad,CROP-seq,Jurkat cells (immune cancer),"CRISPR-cas9, TCR stim",acute T cell leukemia,T cells,cell_line,y,n,n,y,01.12.2016,2016
DatlingerBock2021,Download,DatlingerBock2021,,Ultra-high-throughput single-cell RNA sequencing and perturbation screening with combinatorial fluidic indexing,https://doi.org/10.1038/s41592-021-01153-z,Paul Datlinger,Homo sapiens,RNA,7041849,h5ad,scifi-RNA-seq (derived from CROP-seq),Jurkat,"CRISPR-cas9, TCR stim",acute T cell leukemia,T cells,cell_line,y,n,n,y,01.12.2019,2019
@@ -12,9 +12,9 @@ FrangiehIzar2021_protein,Download,FrangiehIzar2021,protein,Multimodal pooled Per
GasperiniShendure2019_atscale,Download,GasperiniShendure2019,atscale,A Genome-wide Framework for Mapping Gene Regulation via Cellular Genetic Screens,https://doi.org/10.1016/j.cell.2018.11.029,Molly Gasperini,Homo sapiens,RNA,7041849,h5ad,CROP-seq,K562,CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,y,01.06.2018,2018
GasperiniShendure2019_highMOI,Download,GasperiniShendure2019,highMOI,A Genome-wide Framework for Mapping Gene Regulation via Cellular Genetic Screens,https://doi.org/10.1016/j.cell.2018.11.029,Molly Gasperini,Homo sapiens,RNA,7041849,h5ad,CROP-seq,K562,CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,y,01.06.2018,2018
GasperiniShendure2019_lowMOI,Download,GasperiniShendure2019,lowMOI,A Genome-wide Framework for Mapping Gene Regulation via Cellular Genetic Screens,https://doi.org/10.1016/j.cell.2018.11.029,Molly Gasperini,Homo sapiens,RNA,7041849,h5ad,CROP-seq,K562,CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,y,01.06.2018,2018
-GehringPachter2019,Download,GehringPachter2019,,Highly multiplexed single-cell RNA-seq by DNA oligonucleotide tagging of cellular proteins,https://doi.org/10.1038/s41587-019-0372-z,Jase Gehring,Mus musculus,RNA,7041849,h5ad,"96-plex scRNA-seq oligo barcodes",mouse neural stem cells (human only for species mixing experiment),drugs,healthy,neural stem cells,primary,n,n,y,y,01.01.2020,2020
-Liscovitch-BrauerSanjana2021_K562_1,Download,Liscovitch-BrauerSanjana2021,K562_1,"Profiling the genetic determinants of chromatin accessibility with scalable single-cell CRISPR screens",https://doi.org/10.1038/s41587-021-00902-x,"Liscovitch-Brauer, N.",Homo sapiens,ATAC,7058382,zip,CRISPR–sciATAC,Myeloid Leukemia,CRISPR-cas9,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.04.2021,2021
-Liscovitch-BrauerSanjana2021_K562_2,Download,Liscovitch-BrauerSanjana2021,K562_2,"Profiling the genetic determinants of chromatin accessibility with scalable single-cell CRISPR screens",https://doi.org/10.1038/s41587-021-00902-x,"Liscovitch-Brauer, N.",Homo sapiens,ATAC,7058382,zip,CRISPR–sciATAC,Myeloid Leukemia,CRISPR-cas9,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.04.2021,2021
+GehringPachter2019,Download,GehringPachter2019,,Highly multiplexed single-cell RNA-seq by DNA oligonucleotide tagging of cellular proteins,https://doi.org/10.1038/s41587-019-0372-z,Jase Gehring,Mus musculus,RNA,7041849,h5ad,96-plex scRNA-seq oligo barcodes,mouse neural stem cells (human only for species mixing experiment),drugs,healthy,neural stem cells,primary,n,n,y,y,01.01.2020,2020
+Liscovitch-BrauerSanjana2021_K562_1,Download,Liscovitch-BrauerSanjana2021,K562_1,Profiling the genetic determinants of chromatin accessibility with scalable single-cell CRISPR screens,https://doi.org/10.1038/s41587-021-00902-x,"Liscovitch-Brauer, N.",Homo sapiens,ATAC,7058382,zip,CRISPR–sciATAC,Myeloid Leukemia,CRISPR-cas9,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.04.2021,2021
+Liscovitch-BrauerSanjana2021_K562_2,Download,Liscovitch-BrauerSanjana2021,K562_2,Profiling the genetic determinants of chromatin accessibility with scalable single-cell CRISPR screens,https://doi.org/10.1038/s41587-021-00902-x,"Liscovitch-Brauer, N.",Homo sapiens,ATAC,7058382,zip,CRISPR–sciATAC,Myeloid Leukemia,CRISPR-cas9,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.04.2021,2021
McFarlandTsherniak2020,Download,McFarlandTsherniak2020,,Multiplexed single-cell transcriptional response profiling to define cancer vulnerabilities and therapeutic mechanism of action,https://doi.org/10.1038/s41467-020-17440-w,James McFarland,Homo sapiens,RNA,7041849,h5ad,MIX-seq (pooled scRNA-seq),200 different cell lines,"drugs, CRISPR-cas9",NA,NA,cell_line,y,partially,n,n,01.12.2019,2019
MimitouSmibert2021,Download,MimitouSmibert2021,,"Scalable, multimodal profiling of chromatin accessibility, gene expression and protein levels in single cells",https://doi.org/10.1038/s41587-021-00927-2,Eleni P. Mimitou,Homo sapiens,ATAC + protein,7041849,h5ad,ASAP-seq,Purified CD4 T cells,CRISPR-cas9,healthy,T cells,primary,n,n,n,n,01.05.2019,2019
NormanWeissman2019_filtered,Download,NormanWeissman2019,filtered,Exploring genetic interaction manifolds constructed from rich single-cell phenotypes,https://doi.org/10.1126/science.aax4438,Thomas Norman,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRa,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,y,01.03.2019,2019
@@ -22,9 +22,9 @@ PapalexiSatija2021_eccite_arrayed_protein,Download,PapalexiSatija2021,eccite_arr
PapalexiSatija2021_eccite_arrayed_RNA,Download,PapalexiSatija2021,eccite_arrayed_RNA,Characterizing the molecular regulation of inhibitory immune checkpoints with multimodal single-cell screens,https://doi.org/10.1038/s41588-021-00778-2,"Papalexi, E.",Homo sapiens THP-1,RNA + protein (RNA),7041849,h5ad,ECCITE-seq,THP-1 (monocytic leukemia ),CRISPR-cas9,acute monocytic leukemia,monocytes,cell_line,y,n,n,n,01.03.2021,2021
PapalexiSatija2021_eccite_protein,Download,PapalexiSatija2021,eccite_protein,Characterizing the molecular regulation of inhibitory immune checkpoints with multimodal single-cell screens,https://doi.org/10.1038/s41588-021-00778-2,"Papalexi, E.",Homo sapiens THP-1,RNA + protein (protein),7041849,h5ad,ECCITE-seq,THP-1 (monocytic leukemia ),CRISPR-cas9,acute monocytic leukemia,monocytes,cell_line,y,n,n,n,01.03.2021,2021
PapalexiSatija2021_eccite_RNA,Download,PapalexiSatija2021,eccite_RNA,Characterizing the molecular regulation of inhibitory immune checkpoints with multimodal single-cell screens,https://doi.org/10.1038/s41588-021-00778-2,"Papalexi, E.",Homo sapiens THP-1,RNA + protein (RNA),7041849,h5ad,ECCITE-seq,THP-1 (monocytic leukemia ),CRISPR-cas9,acute monocytic leukemia,monocytes,cell_line,y,n,n,n,01.03.2021,2021
-PierceGreenleaf2021_K562,Download,PierceGreenleaf2021,K562,"High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer",https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,K562 (leukemia),CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.11.2020,2020
-PierceGreenleaf2021_MCF7,Download,PierceGreenleaf2021,MCF7,"High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer",https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,MCF7 (breast cancer),CRISPRi,breast adenocarcinoma,mammary epithelial cells,cell_line,y,n,n,n,01.11.2020,2020
-PierceGreenleaf2021_GM12878,Download,PierceGreenleaf2021,GM12878,"High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer",https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,GM12878 (lymphoblastoid cells),CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.11.2020,2020
+PierceGreenleaf2021_K562,Download,PierceGreenleaf2021,K562,High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer,https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,K562 (leukemia),CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.11.2020,2020
+PierceGreenleaf2021_MCF7,Download,PierceGreenleaf2021,MCF7,High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer,https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,MCF7 (breast cancer),CRISPRi,breast adenocarcinoma,mammary epithelial cells,cell_line,y,n,n,n,01.11.2020,2020
+PierceGreenleaf2021_GM12878,Download,PierceGreenleaf2021,GM12878,High-throughput single-cell chromatin accessibility CRISPR screens enable unbiased identification of regulatory networks in cancer,https://doi.org/10.1038/s41467-021-23213-w,Sarah Pierce,Homo sapiens,ATAC,7058382,zip,Spear-ATAC,GM12878 (lymphoblastoid cells),CRISPRi,chronic myelogenous leukemia,lymphoblasts,cell_line,y,n,n,n,01.11.2020,2020
ReplogleWeissman2022_K562_essential,Download,ReplogleWeissman2022,K562_essential,Mapping information-rich genotype-phenotype landscapes with genome-scale Perturb-seq,https://doi.org/10.1016/j.cell.2022.05.013,Joseph Replogle,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,n,01.10.2022,2022
ReplogleWeissman2022_K562_gwps,Download,ReplogleWeissman2022,K562_gwps,Mapping information-rich genotype-phenotype landscapes with genome-scale Perturb-seq,https://doi.org/10.1016/j.cell.2022.05.013,Joseph Replogle,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,n,01.10.2022,2022
ReplogleWeissman2022_rpe1,Download,ReplogleWeissman2022,rpe1,Mapping information-rich genotype-phenotype landscapes with genome-scale Perturb-seq,https://doi.org/10.1016/j.cell.2022.05.013,Joseph Replogle,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,,CRISPRi,chronic myeloid leukemia,epithelial,cell_line,n,n,n,n,01.10.2022,2022
@@ -49,10 +49,11 @@ JoungZhang2023_combinatorial,Download,JoungZhang2023,combinatorial,A transcripti
YaoCleary2023,Download,YaoCleary2023,,Scalable genetic screening for regulatory circuits using compressed Perturb-seq,https://doi.org/10.1101/2023.01.23.525200,Douglas Yao,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,THP-1 (monocytic leukemia ),"CRISPR-cas9, CRISPRi",,monocytes,cell_line,y,,,,,2023
QinTape2023_scRNAseq,Download,QinTape2023,scRNAseq,A Single-cell Perturbation Landscape of Colonic Stem Cell Polarisation,https://doi.org/10.1101/2023.02.15.528008,Xiao Qin,Mus musculus,RNA + protein (RNA),7041849,h5ad,CPA-Perturb-seq,colonic organoids,"drugs, genotype and co-cultures",,colonic epithelial cells,organoid,y,,,,,2023
SantinhaPlatt2023,Download,SantinhaPlatt2023,,Transcriptional linkage analysis with in vivo AAV-Perturb-seq,https://doi.org/10.1038/s41586-023-06570-y,Antonio J. Santinha,Mus musculus,RNA,7041849,h5ad,adeno-associated virus (AAV)-mediated direct in vivo single-cell CRISPR screening,brain tissue,CRISPR-cas9,healthy,neurons,primary,y,,,,,2023
-LaraAstiasoHuntly2023_leukemia,Download,LaraAstiasoHuntly2023,leukemia,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,"https://doi.org/10.1038/s41588-023-01471-2",David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,leukemia,bone marrow,primary,y,n,n,n,,2023
-LaraAstiasoHuntly2023_invivo,Download,LaraAstiasoHuntly2023,invivo,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,"https://doi.org/10.1038/s41588-023-01471-2",David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,healthy,bone marrow,primary,n,y,n,n,,2023
-LaraAstiasoHuntly2023_exvivo,Download,LaraAstiasoHuntly2023,exvivo,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,"https://doi.org/10.1038/s41588-023-01471-2",David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,healthy,bone marrow,primary,n,y,n,n,,2023
+LaraAstiasoHuntly2023_leukemia,Download,LaraAstiasoHuntly2023,leukemia,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,https://doi.org/10.1038/s41588-023-01471-2,David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,leukemia,bone marrow,primary,y,n,n,n,,2023
+LaraAstiasoHuntly2023_invivo,Download,LaraAstiasoHuntly2023,invivo,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,https://doi.org/10.1038/s41588-023-01471-2,David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,healthy,bone marrow,primary,n,y,n,n,,2023
+LaraAstiasoHuntly2023_exvivo,Download,LaraAstiasoHuntly2023,exvivo,In vivo screening characterizes chromatin factor functions during normal and malignant hematopoiesis,https://doi.org/10.1038/s41588-023-01471-2,David Lara-Astiaso,Mus musculus,RNA,7041849,h5ad,single-cell in vivo CRISPR screen,bone marrow transplant,CRISPR-cas9,healthy,bone marrow,primary,n,y,n,n,,2023
SunshineHein2023,Download,SunshineHein2023,,Systematic functional interrogation of SARS-CoV-2 host factors using Perturb-seq,https://doi.org/10.1038/s41467-023-41788-4,Sara Sunshine,Homo sapiens,RNA,7041849,h5ad,Perturb-seq with CRISPRi,Calu-3,CRISPRi,lung adenocarcinoma and SARS-CoV-2,lung epithelial cells,cell_line,y,n,n,y,,2023
WesselsSatija2023,Download,WesselsSatija2023,,Efficient combinatorial targeting of RNA transcripts in single cells with Cas13 RNA Perturb-seq ,https://doi.org/10.1038/s41592-022-01705-x,Hans-Hermann Wessels,Homo sapiens; Mus musculus,RNA,7041849,h5ad,CaRPool-seq,"AML cell lines (HEK293FT, NIH/3T3, or THP1 cells)",CRISPR-cas13,AML,myeloid cells,cell_line,y,n,n,y,,2023
LiangWang2023,Download,LiangWang2023,,In-organoid single-cell CRISPR screening reveals determinants of hepatocyte differentiation and maturation,https://doi.org/10.1186/s13059-023-03084-8,Junbo Liang,Mus musculus,RNA,7041849,h5ad,CROP-seq,liver organoids,"CRISPR-cas9, growth conditions",healthy,hepatocytes,organoid,n,n,n,y,,2023
-LotfollahiTheis2023,Download,LotfollahiTheis2023,,Predicting cellular responses to complex perturbations in high-throughput screens,https://doi.org/10.15252/msb.202211517,Mohammad Lotfollahi,Homo sapiens,RNA,7041849,h5ad,sciplex,A549,drugs,lung adenocarcinoma,lung epithelial cells,cell_line,y,n,n,y,,2023
\ No newline at end of file
+LotfollahiTheis2023,Download,LotfollahiTheis2023,,Predicting cellular responses to complex perturbations in high-throughput screens,https://doi.org/10.15252/msb.202211517,Mohammad Lotfollahi,Homo sapiens,RNA,7041849,h5ad,sciplex,A549,drugs,lung adenocarcinoma,lung epithelial cells,cell_line,y,n,n,y,,2023
+CuiHacohen2023,Download,CuiHacohen2023,,Dictionary of immune responses to cytokines at single-cell resolution,https://doi.org/10.1038/s41586-023-06816-9,Ang Cui,Mus musculus,RNA,7041849,h5ad,Custom,draining lymph nodes,cytokines,healthy,mixed,primary,n,n,n,n,,2023
\ No newline at end of file
diff --git a/website/datavzrd/scperturb_dataset_info_datavzrd_annotated.csv b/website/datavzrd/scperturb_dataset_info_datavzrd_annotated.csv
index 9ddec1f..b7381d9 100644
--- a/website/datavzrd/scperturb_dataset_info_datavzrd_annotated.csv
+++ b/website/datavzrd/scperturb_dataset_info_datavzrd_annotated.csv
@@ -4,6 +4,7 @@ AdamsonWeissman2016_GSM2406677_10X005,Download,AdamsonWeissman2016,GSM2406677_10
AdamsonWeissman2016_GSM2406681_10X010,Download,AdamsonWeissman2016,GSM2406681_10X010,A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response,https://doi.org/10.1016/j.cell.2016.11.048,Britt Adamson,Homo sapiens,RNA,7041849,h5ad,Perturb-seq,K562,CRISPRi,Chronic myeloid leukemia,lymphoblasts,cell_line,y,n,n,y,01.10.2019,2019,65337.0,113.0,3690.0,15355.0,2.0,554.0,1.0,6010.0
AissaBenevolenskaya2021,Download,AissaBenevolenskaya2021,,Single-cell transcriptional changes associated with drug tolerance and response to combination therapies in cancer,https://doi.org/10.1038/s41467-021-21884-z,Alexandre Aissa,Homo sapiens; Mus musculus,RNA,7041849,h5ad,Drop-Seq,"cell lines, patient tissue, mouse xenografts",drugs,Lung adenocarcinoma,Lung adenocarcinoma,cell_line,y,no,n,y,01.11.2020,2020,119071.0,3.0,49.0,75.0,1.0,29874.0,29340.0,29983.0
ChangYe2021,Download,ChangYe2021,,Identifying transcriptional programs underlying cancer drug response with TraCe-seq,https://doi.org/10.1038/s41587-021-01005-3,Matthew Chang,Homo sapiens,RNA,7041849,h5ad,clonal tagging (TraCe-seq),"lung and breast cancer cell lines PC9, MCF-10A, MDA-MB-231, NCI-H358, and NCI-H1373",drugs,lung adenocarcinoma,Lung adenocarcinoma,cell_line,y,n,n,n,01.08.2021,2021,42277.0,3.0,5278.0,18961.0,1.0,7657.0,5920.0,21043.0
+CuiHacohen2023,Download,CuiHacohen2023,,Dictionary of immune responses to cytokines at single-cell resolution,https://doi.org/10.1038/s41586-023-06816-9,Ang Cui,Mus musculus,RNA,7041849,h5ad,Custom,draining lymph nodes,cytokines,healthy,mixed,primary,n,n,n,n,,2023,539717.0,87.0,119.0,144.0,1.0,681.5,202.0,349669.0
DatlingerBock2017,Download,DatlingerBock2017,,Pooled CRISPR screening with single-cell transcriptome readout,https://doi.org/10.1038/nmeth.4177,Paul Datlinger,Homo sapiens; Mus musculus,RNA,7041849,h5ad,CROP-seq,Jurkat cells (immune cancer),"CRISPR-cas9, TCR stim",acute T cell leukemia,T cells,cell_line,y,n,n,y,01.12.2016,2016,5905.0,96.0,2713.0,6711.0,4.0,49.0,3.0,1320.0
DatlingerBock2021,Download,DatlingerBock2021,,Ultra-high-throughput single-cell RNA sequencing and perturbation screening with combinatorial fluidic indexing,https://doi.org/10.1038/s41592-021-01153-z,Paul Datlinger,Homo sapiens,RNA,7041849,h5ad,scifi-RNA-seq (derived from CROP-seq),Jurkat,"CRISPR-cas9, TCR stim",acute T cell leukemia,T cells,cell_line,y,n,n,y,01.12.2019,2019,39194.0,40.0,179.0,460.0,2.0,809.0,213.0,4497.0
DixitRegev2016,Download,DixitRegev2016,,Perturb-Seq: Dissecting Molecular Circuits with Scalable Single-Cell RNA Profiling of Pooled Genetic Screens,https://doi.org/10.1016/j.cell.2016.11.038,Atray Dixit,Homo sapiens; Mus musculus,RNA,7041849,h5ad,Perturb-seq,K562,CRISPR-cas9,myelogenous leukemia,lymphoblasts,,y,partially (TF knockout screen 7 and 13 dpi),n,y,01.11.2016,2016,51898.0,1728.0,3080.0,13974.0,4.0,1.0,1.0,4224.0