diff --git a/README.md b/README.md index f6cd4ab3..97e67074 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,9 @@ Download the latest [release from](https://github.com/alexdobin/STAR/releases) a ```bash # Get latest STAR source from releases -wget https://github.com/alexdobin/STAR/archive/2.7.0e.tar.gz -tar -xzf 2.7.0e.tar.gz -cd STAR-2.7.0e +wget https://github.com/alexdobin/STAR/archive/2.7.0f.tar.gz +tar -xzf 2.7.0f.tar.gz +cd STAR-2.7.0f # Alternatively, get STAR source using git git clone https://github.com/alexdobin/STAR.git diff --git a/bin/Linux_x86_64/STAR b/bin/Linux_x86_64/STAR index 94b06944..e3162a5d 100755 Binary files a/bin/Linux_x86_64/STAR and b/bin/Linux_x86_64/STAR differ diff --git a/bin/Linux_x86_64/STARlong b/bin/Linux_x86_64/STARlong index 9c9b69fc..ef38ba8d 100755 Binary files a/bin/Linux_x86_64/STARlong and b/bin/Linux_x86_64/STARlong differ diff --git a/bin/Linux_x86_64_static/STAR b/bin/Linux_x86_64_static/STAR index 7be7c281..9050a2bc 100755 Binary files a/bin/Linux_x86_64_static/STAR and b/bin/Linux_x86_64_static/STAR differ diff --git a/bin/Linux_x86_64_static/STARlong b/bin/Linux_x86_64_static/STARlong index b07a42bb..0365a68d 100755 Binary files a/bin/Linux_x86_64_static/STARlong and b/bin/Linux_x86_64_static/STARlong differ diff --git a/doc/STARmanual.pdf b/doc/STARmanual.pdf index f60925f5..bd89e64e 100644 Binary files a/doc/STARmanual.pdf and b/doc/STARmanual.pdf differ diff --git a/extras/doc-latex/STARmanual.tex b/extras/doc-latex/STARmanual.tex index d2a1bbc0..656f9669 100644 --- a/extras/doc-latex/STARmanual.tex +++ b/extras/doc-latex/STARmanual.tex @@ -34,7 +34,7 @@ \newcommand{\sechyperref}[1]{\hyperref[#1]{Section \ref{#1}. \nameref{#1}}} -\title{STAR manual 2.7.0e} +\title{STAR manual 2.7.0f} \author{Alexander Dobin\\ dobin@cshl.edu} \maketitle @@ -253,7 +253,7 @@ \subsection{Log files.} \subsection{SAM.} \ofilen{Aligned.out.sam} - alignments in standard SAM format. \subsubsection{Multimappers.} -The number of loci \code{Nmap} a read maps to is given by \code{NH:i:Nmap} field. Value of 1 corresponds to unique mappers, while values \textgreater1 corresponds to multi-mappers. \code{HI} attrbiutes enumerates multiple alignments of a read starting with 1 (this can be changed with the \opt{outSAMattrIHstart} - setting it to 0 may be required for compatibility with downstream software such as Cufflinks or StringTie). +The number of loci \code{Nmap} a read maps to is given by \code{NH:i:Nmap} field. Value of 1 corresponds to unique mappers, while values \textgreater1 corresponds to multi-mappers. \code{HI} attrbiutes enumerates multiple alignments of a read starting with 1 (this can be changed with the \opt{outSAMattrIHstart} - setting it to 0 may be required for compatibility with downstream software such as Cufflinks). The mapping quality MAPQ (column 5) is 255 for uniquely mapping reads, and int(-10*log10(1-1/Nmap)) for multi-mapping reads. This scheme is same as the one used by TopHat and is compatible with Cufflinks. The default MAPQ=255 for the unique mappers maybe changed with \opt{outSAMmapqUnique} parameter (integer 0 to 255) to ensure compatibility with downstream tools such as GATK. @@ -522,6 +522,44 @@ \section{STARsolo: mapping, demultiplexing and gene quantification for single ce Other solo* options can be found in the Section \ref{STARsolo_(single_cell_RNA-seq)_parameters}. +\subsection{Feature statistics summaries.} +Feature statistics summaries are recorded in the \optvr{Solo.out/} directory in files \optvr{.stats} where features are those used in the \opt{soloFeatures} option, e.g. \optvr{Gene.stats}. The following metrics are recorded: +\begin{itemize}[leftmargin=1.5in] + \itemsep -0.3em + \item[\optv{nNinBarcode:}] number of reads with more than 2 Ns in cell barcode (CB) + \item[\optv{nUMIhomopolymer:}] number of reads with homopolymer in CB + \item[\optv{nTooMany:}] not used at the moment + \item[\optv{nNoMatch:}] number of reads with CBs that do not match whitelist even with one mismatch +\end{itemize} +All of the above reads are discarded from Solo output. Remaining reads are checked for overlap with features (e.g. genes): +\begin{itemize}[leftmargin=2in] + \itemsep -0.3em + \item[\optv{nUnmapped:}] number of reads unmapped to the genome + \item[\optv{nNoFeature:}] number of reads that map to the genome but do not belong to a feature + \item[\optv{nAmbigFeature:}] number of reads that belong to more than one feature + \item[\optv{nAmbigFeatureMultimap:}] number of reads that belong to more than one feature and are also multimapping to the genome (this is a subset of the nAmbigFeature) + \item[\optv{nTooMany:}] number of reads with ambiguous CB (i.e. CB matches whitelist with one mismatch but with posterior probability <0.95) + \item[\optv{nNoExactMatch:}] number of reads with CB that matches a whitelist barcode with 1 mismatch, but this whitelist barcode does not get any other reads with exact matches of CB +\end{itemize} +All of the reads above are output in feature (e.g. gene) / cell count matrices. +\begin{itemize}[leftmargin=1.5in] + \itemsep -0.3em + \item[\optv{nExactMatch:}] number of reads with CB that match the whitelist exactly + \item[\optv{nMatch:}] total number of reads that match CB with 0 or 1 mismatches (this is superset of nExactMatch) + \item[\optv{nCellBarcodes:}] number of distinct CBs detected + \item[\optv{nUMIs:}] number of distinct UMIs detected +\end{itemize} + +These metrics can be grouped into more broad categories: +\begin{itemize} + \itemsep -0.3em + \item[]\optv{nNinBarcode+nUMIhomopolymer+nNoMatch+nTooMany+nNoExactMatch} = number of reads with CBs that do not match whitelist. + \item[]\optv{nUnmapped+nAmbigFeature} = number of reads without defined feature (gene) + \item[]\optv{nMatch} = number of reads that are output as solo counts + +\end{itemize} +The three categoties above summed together should be equal to the total number of reads. + \section{Description of all options.}\label{Description_of_all_options} For each STAR version, the most up-to-date information about all STAR parameters can be found in the \code{parametersDefault} file in the STAR source directory. The parameters in the \code{parametersDefault}, as well as in the descriptions below, are grouped by function: \begin{itemize} diff --git a/extras/doc-latex/parametersDefault.tex b/extras/doc-latex/parametersDefault.tex index 69b1f638..0633eb6d 100644 --- a/extras/doc-latex/parametersDefault.tex +++ b/extras/doc-latex/parametersDefault.tex @@ -282,7 +282,7 @@ \optOpt{vA} \optOptLine{variant allele} \optOpt{vG} \optOptLine{genomic coordiante of the variant overlapped by the read} \optOpt{vW} \optOptLine{0/1 - alignment does not pass / passes WASP filtering. Requires --waspOutputMode SAMtag} - \optOpt{CR,CY,UR,UY} \optOptLine{sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing} + \optOpt{CR CY UR UY} \optOptLine{sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing} \end{optOptTable} \optLine{Unsupported/undocumented:} \begin{optOptTable} @@ -472,7 +472,7 @@ \end{optOptTable} \optName{outFilterIntronStrands} \optValue{RemoveInconsistentStrands} - \optLine{string: filter alignments } + \optLine{string: filter alignments} \begin{optOptTable} \optOpt{RemoveInconsistentStrands} \optOptLine{remove alignments that have junctions with inconsistent strands} \optOpt{None} \optOptLine{no filtering} @@ -615,7 +615,7 @@ \optValue{0 ConcordantPair} \optLine{int, string: allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate} \optLine{1st word: int: maximum number of protrusion bases allowed} - \optLine{2nd word: string: } + \optLine{2nd word: string:} \begin{optOptTable} \optOpt{ConcordantPair} \optOptLine{report alignments with non-zero protrusion as concordant pairs} \optOpt{DiscordantPair} \optOptLine{report alignments with non-zero protrusion as discordant pairs} @@ -635,7 +635,7 @@ \optOpt{Right} \optOptLine{insertions are flushed to the right} \end{optOptTable} \end{optTable} -\optSection{Paired-End reads: presently unsupported/undocumented}\label{Paired-End_reads:_presently_unsupported/undocumented} +\optSection{Paired-End reads}\label{Paired-End_reads} \begin{optTable} \optName{peOverlapNbasesMin} \optValue{0} diff --git a/extras/docker/Dockerfile b/extras/docker/Dockerfile index fa583a78..6e4f9b82 100755 --- a/extras/docker/Dockerfile +++ b/extras/docker/Dockerfile @@ -2,7 +2,7 @@ FROM debian:stretch-slim MAINTAINER dobin@cshl.edu -ARG STAR_VERSION=2.7.0e +ARG STAR_VERSION=2.7.0f ENV PACKAGES gcc g++ make wget zlib1g-dev unzip