Merge pull request #86 from baigal628/lisa2

MAESTRO v1.2.2 include LISA2
liulab-dfci · Dec 10, 2020 · c904dca · c904dca
2 parents 5917edc + 8844f16
commit c904dca
Show file tree

Hide file tree

Showing 15 changed files with 260 additions and 323 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -55,6 +55,7 @@ install:
 
   # configure the channels
   - conda config --add channels defaults
+  - conda config --add channels liulab-dfci
   - conda config --add channels bioconda
   - conda config --add channels conda-forge
   - conda install -q mamba -c conda-forge
@@ -127,6 +128,7 @@ script:
   - giggle search 
   # - Rabit -help
   - sinto -h
+  - lisa
   - MAESTRO -v
   - R -e "library(MAESTRO);library(Seurat)"
   - R -e "library(org.Hs.eg.db);library(org.Mm.eg.db)"

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,14 +1,14 @@
 Package: MAESTRO
 Type: Package
 Title: Model-based Analyses of Single-cell Transcriptome and Regulome
-Version: 1.2.1.9999
+Version: 1.2.2
 Date: 2020-10-28
 Author: Chenfei Wang, Dongqing Sun
 Maintainer: Dongqing Sun<dongqingsun96@gmail.com>
-Description: MAESTRO is an intergrative analysis pipeline to support downstream analysis of single-cell RNA-seq and single-cell ATAC-seq dataset. MAESTRO provides funtion for quality control, normalization, clustering, differential gene and peak analysis, marker gene based annotation, transcription factor identification using Cistome toolkit, and intergrative analysis for scRNA-seq and scATACseq. 
+Description: MAESTRO is an intergrative analysis pipeline to support downstream analysis of single-cell RNA-seq and single-cell ATAC-seq dataset. MAESTRO provides funtion for quality control, normalization, clustering, differential gene and peak analysis, marker gene based annotation, transcription factor identification using Cistome toolkit, and intergrative analysis for scRNA-seq and scATACseq.
 URL: https://github.com/LiuLab-dfci/MAESTRO
 Depends: R (>= 3.6.1)
-Imports: Seurat (>= 3.1.2), ggplot2 (>= 3.0.0), ggrepel, cowplot, Matrix (>= 
+Imports: Seurat (>= 3.1.2), ggplot2 (>= 3.0.0), ggrepel, cowplot, Matrix (>=
          1.2.14), dplyr, png, RColorBrewer, scales, pheatmap, MAGeCKFlute, DESeq2, Gmisc,
          grid, karyoploteR, presto, AnnotationDbi, org.Hs.eg.db, org.Mm.eg.db
 Suggests: knitr, pagoda2, RCA, MAST, scABC, devtools, uwot, cisTopic, chromVAR, motifmatchr,
@@ -24,7 +24,7 @@ License: GPL (>=3)
 Encoding: UTF-8
 LazyData: true
 NeedsCompilation: no
-biocViews: Software, Single-cell, RNA-seq, ATAC-seq, QualityControl, Clustering, 
+biocViews: Software, Single-cell, RNA-seq, ATAC-seq, QualityControl, Clustering,
        DifferentialExpression, DifferentialPeakCalling, TranscriptionFactorEnrichment,
        IntegrativeAnalyis, Visualization
 RoxygenNote: 7.0.2
diff --git a/MAESTRO/MAESTRO_ParameterValidate.py b/MAESTRO/MAESTRO_ParameterValidate.py
@@ -120,19 +120,9 @@ def scrna_validator(args):
             logging.error("--rsem is required. Please provide the prefix of transcript references for RSEM. See --rsem help for more details.")
             exit(1)
 
-    if args.lisamode == "local":
-        if args.lisaenv == "":
-            logging.error("--lisaenv is required when lisamode is 'local'. Please specify the name of LISA environment!")
-            exit(1)
-        if args.condadir == "":
-            logging.error("--condadir is required when lisamode is 'local'. Please specify the directory where miniconda or anaconda is installed!")
-            exit(1)
-
     if args.signature not in ['human.immune.CIBERSORT', 'mouse.brain.ALLEN', 'mouse.all.facs.TabulaMuris', 'mouse.all.droplet.TabulaMuris']:
         if os.path.exists(args.signature):
             pass
         else:
             logging.error("Please specify the signature built in MAESTRO or provide customized signature file. See --signature help for more details!")
             exit(1)
-
-
diff --git a/MAESTRO/MAESTRO_PipeInit.py b/MAESTRO/MAESTRO_PipeInit.py
diff --git a/MAESTRO/R/scRNAseq_pipe.R b/MAESTRO/R/scRNAseq_pipe.R
@@ -19,21 +19,15 @@ option_list = list(
   make_option(c("--species"), type = "character", default = "GRCh38",
               action = "store", help = "The platform of scRNA-seq."
   ),
-  make_option(c("--method"), type = "character", default = "LISA",
-              action = "store", help = "The method to identify driver regulators. [LISA, RABIT]"
-  ),
+  #make_option(c("--lisamode"), type = "character", default = "multi",
+              #action = "store", help = "Mode to run LISA (multi or one-vs-rest)."
+  #),
+  #make_option(c("--method"), type = "character", default = "LISA",
+              #action = "store", help = "The method to identify driver regulators. [LISA, RABIT]"
+  #),
   make_option(c("--signature"), type = "character", default = "",
               action = "store", help = "The cell signature file for celltype annotation. Default is built-in CIBERSORT immune cell signature."
   ),
-  make_option(c("--lisamode"), type = "character", default = "",
-              action = "store", help = "Mode to run LISA (web or local)."
-  ),
-  make_option(c("--condadir"), type = "character", default = "",
-              action = "store", help = "Directory where miniconda or anaconda is installed (only if method is set to lisa)."
-  ),
-  make_option(c("--lisaenv"), type = "character", default = "lisa",
-              action = "store", help = "Name of lisa environment (only if method is set to lisa)."
-  ),
   make_option(c("--thread"), type = "integer", default = 1,
               action = "store", help = "Number of cores to use."
   )
@@ -45,11 +39,8 @@ setwd(argue$outdir)
 count_mat = argue$expression
 prefix = argue$prefix
 thread = argue$thread
-method = argue$method
+#method = argue$method
 sigfile = argue$signature
-lisamode = argue$lisamode
-condadir = argue$condadir
-lisaenv = argue$lisaenv
 species = argue$species
 
 
@@ -79,10 +70,8 @@ if(sigfile %in% c("human.immune.CIBERSORT", "mouse.brain.ALLEN", "mouse.all.facs
   signatures = read.table(sigfile, header = FALSE, sep = "\t", stringsAsFactors = FALSE)
 }
 RNA.res <- RNARunSeurat(inputMat = exp.dat, project = prefix, min.c = 10, min.g = 100)
-RNA.res$RNA <- RNAAnnotateCelltype(RNA = RNA.res$RNA, genes = RNA.res$genes, 
+RNA.res$RNA <- RNAAnnotateCelltype(RNA = RNA.res$RNA, genes = RNA.res$genes,
                                    signatures = signatures, min.score = 0.05)
 saveRDS(RNA.res, paste0(prefix, "_scRNA_Object.rds"))
-RNA.tfs <- RNAAnnotateTranscriptionFactor(RNA = RNA.res$RNA, genes = RNA.res$genes, project = prefix, 
-                                          method = method, lisa.mode = lisamode, 
-                                          conda.dir = condadir, lisa.envname = lisaenv, 
+RNA.tfs <- RNAAnnotateTranscriptionFactor(RNA = RNA.res$RNA, genes = RNA.res$genes, project = prefix,
                                           organism = species, top.tf = 10)
diff --git a/MAESTRO/Snakemake/scRNA/Snakefile b/MAESTRO/Snakemake/scRNA/Snakefile
@@ -434,19 +434,21 @@ rule scrna_analysis:
         species = config["species"],
         outpre = config["outprefix"],
         outdir = "Result/Analysis",
-        method = "LISA",
-        lisamode = config["lisamode"],
-        lisaenv = config["lisaenv"],
-        condadir = config["condadir"],
+        lisadir = config["lisadir"],
+        #method = "LISA",
+        #lisamode = config["lisamode"],
+        #lisaenv = config["lisaenv"],
+        #condadir = config["condadir"],
         signature = config["signature"]
     benchmark:
         "Result/Benchmark/%s_Analysis.benchmark" %(config["outprefix"])
     threads:
         config["cores"]
     shell:
+        "lisa unpack {params.lisadir}; "
         "Rscript " + RSCRIPT_PATH + "/scRNAseq_pipe.R --expression {params.expression} --species {params.species} "
-        "--prefix {params.outpre} --method {params.method} --signature {params.signature} "
-        "--lisamode {params.lisamode} --condadir {params.condadir} --lisaenv {params.lisaenv} --outdir {params.outdir} --thread {threads}"
+        "--prefix {params.outpre} --signature {params.signature} "
+        "--outdir {params.outdir} --thread {threads}"
 
 if config["rseqc"]:
     rule scrna_report:

diff --git a/MAESTRO/Snakemake/scRNA/config_template.yaml b/MAESTRO/Snakemake/scRNA/config_template.yaml
@@ -1,7 +1,7 @@
 # Directory where fastq files are stored
 fastqdir: {{ fastqdir }}
 
-# Sample name of fastq file (only for platform of "10x-genomics", for example, 
+# Sample name of fastq file (only for platform of "10x-genomics", for example,
 # if there is a file named pbmc_1k_v2_S1_L001_I1_001.fastq.gz, the sample name is "pbmc_1k_v2". )
 fastqprefix: {{ fastqprefix }}
 
@@ -15,33 +15,21 @@ platform: {{ platform }}
 outprefix: {{ outprefix }}
 
 # Whether or not to run RSeQC. [True, False]
-# If it's set to True, the pipeline will include the RSeQC part and then takes a longer time. 
+# If it's set to True, the pipeline will include the RSeQC part and then takes a longer time.
 # By default, the pipeline will skip the RSeQC part. DEFAULT: False.
 rseqc: {{ rseqc }}
 
-# Number of cores to use
+# Number of cores to use.
 cores: {{ cores }}
 
-# Cell signature file used to annotate cell types. MAESTRO provides several sets of built-in cell signatures. 
-# Users can choose from ['human.immune.CIBERSORT', 'mouse.brain.ALLEN', 'mouse.all.facs.TabulaMuris', 'mouse.all.droplet.TabulaMuris']. 
-# Custom cell signatures are also supported. In this situation, users need to provide the file location of cell signatures, 
+# Cell signature file used to annotate cell types. MAESTRO provides several sets of built-in cell signatures.
+# Users can choose from ['human.immune.CIBERSORT', 'mouse.brain.ALLEN', 'mouse.all.facs.TabulaMuris', 'mouse.all.droplet.TabulaMuris'].
+# Custom cell signatures are also supported. In this situation, users need to provide the file location of cell signatures,
 # and the signature file is tab-seperated without header. The first column is cell type, and the second column is signature gene.
 signature: {{ signature }}
 
-# Mode to Run LISA, 'local' or 'web'. If the mode is set as 'local', 
-# please install LISA (https://github.com/qinqian/lisa) and download pre-computed datasets following the instructions. 
-# The 'web' mode is to run online version of LISA. In consideration of the connection issue and size of datasets, 
-# the 'local' mode is recommended to run the whole MAESTRO pipeline. 
-# If the mode is 'local', please provide the name of LISA environment through lisaenv 
-# and specify the directory where miniconda or anaconda is installed through condadir. DEFAULT: local.
-lisamode: {{ lisamode }}
-
-# Name of lisa environment (required if method is set to lisa and lisamode is set to local). DEFAULT: lisa.
-lisaenv: {{ lisaenv }}
-
-# Directory where miniconda or anaconda is installed (required if method is set to lisa and lisamode is set to local).
-# For example, /home/user/miniconda3
-condadir: {{ condadir }}
+# Path to the LISA data files
+lisadir: {{ lisadir }}
 
 
 # Cutoff for quality control
@@ -54,46 +42,46 @@ cutoff:
   cell: {{ cell }}
 
 
-# Reference genome 
+# Reference genome
 genome:
-  # Genome index directory for STAR. Users can just download the index file 
-  # from http://cistrome.org/~chenfei/MAESTRO/Refdata_scRNA_MAESTRO_GRCh38_1.1.0.tar.gz and decompress it. 
+  # Genome index directory for STAR. Users can just download the index file
+  # from http://cistrome.org/~chenfei/MAESTRO/Refdata_scRNA_MAESTRO_GRCh38_1.1.0.tar.gz and decompress it.
   # Then specify the index directory for STAR, for example, 'Refdata_scRNA_MAESTRO_GRCh38_1.1.0/GRCh38_STAR_2.7.3a'.
   mapindex: {{ mapindex }}
-  # The prefix of transcript references for RSEM used by rsem-prepare-reference (Only required when the platform is Smartseq2). 
-  # Users can directly download the annotation file from 
+  # The prefix of transcript references for RSEM used by rsem-prepare-reference (Only required when the platform is Smartseq2).
+  # Users can directly download the annotation file from
   # http://cistrome.org/~chenfei/MAESTRO/giggle.tar.gz and decompress it.
   # Then specify the prefix for RSEM, for example, 'Refdata_scRNA_MAESTRO_GRCh38_1.1.0/GRCh38_RSEM_1.3.2/GRCh38'.
   rsem: {{ rsem }}
 
 
 # Information about barcode (for platform of 'Dropseq' or '10x-genomics')
 barcode:
-  # If the platform is 'Dropseq' or '10x-genomics', please specify the barcode library (whitelist) 
-  # so that STARsolo can do the error correction and demultiplexing of cell barcodes. 
-  # The 10X Chromium whitelist file can be found inside the CellRanger distribution. 
-  # Please make sure that the whitelist is compatible with the specific version of the 10X chemistry: V2 or V3. 
-  # For example, in CellRanger 3.1.0, the V2 whitelist is 'cellranger-3.1.0/cellranger-cs/3.1.0/lib/python/cellranger/barcodes/737K-august-2016.txt'. 
-  # The V3 whitelist is 'cellranger-3.1.0/cellranger-cs/3.1.0/lib/python/cellranger/barcodes/3M-february-2018.txt'. 
+  # If the platform is 'Dropseq' or '10x-genomics', please specify the barcode library (whitelist)
+  # so that STARsolo can do the error correction and demultiplexing of cell barcodes.
+  # The 10X Chromium whitelist file can be found inside the CellRanger distribution.
+  # Please make sure that the whitelist is compatible with the specific version of the 10X chemistry: V2 or V3.
+  # For example, in CellRanger 3.1.0, the V2 whitelist is 'cellranger-3.1.0/cellranger-cs/3.1.0/lib/python/cellranger/barcodes/737K-august-2016.txt'.
+  # The V3 whitelist is 'cellranger-3.1.0/cellranger-cs/3.1.0/lib/python/cellranger/barcodes/3M-february-2018.txt'.
   whitelist: {{ whitelist }}
   # The start site of each barcode. DEFAULT: 1.
   barcodestart: {{ barcodestart }}
-  # The length of cell barcode. For 10x-genomics, the length of barcode is 16. DEFAULT: 16. 
+  # The length of cell barcode. For 10x-genomics, the length of barcode is 16. DEFAULT: 16.
   barcodelength: {{ barcodelength }}
   # The start site of UMI. DEFAULT: 17.
   umistart: {{ umistart }}
-  # The length of UMI. For 10x-genomics, the length of V2 chemistry is 10. 
-  # For 10X V3 chemistry, the length is 12. DEFAULT: 10. 
+  # The length of UMI. For 10x-genomics, the length of V2 chemistry is 10.
+  # For 10X V3 chemistry, the length is 12. DEFAULT: 10.
   umilength: {{ umilength }}
 
 
 # Specify the barcode fastq file and transcript fastq file (only for platform of "Dropseq")
 fastq:
-  # Specify the barcode fastq file, only for the platform of 'Dropseq'. 
+  # Specify the barcode fastq file, only for the platform of 'Dropseq'.
   # If there are multiple pairs of fastq, please provide a comma-separated list of barcode fastq files.
   # For example, 'test1_1.fastq,test2_1.fastq'
   barcode: {{ barcode }}
-  # Specify the transcript fastq file, only for the platform of 'Dropseq'. 
+  # Specify the transcript fastq file, only for the platform of 'Dropseq'.
   # If there are multiple pairs of fastq, please provide a comma-separated list of barcode fastq files.
   # For example, test1_2.fastq,test2_2.fastq'
   transcript: {{ transcript }}