Wraps useful c++ functions for dealing with peptide data in R.
RPeptideUtils
depends on the peptideUtils c++ library which is included as a submodule in this repository. To clone this repository and the peptideUtils
submodule, run:
git clone --recurse-submodules https://github.com/ajmaurais/RPeptideUtils
Next, build the peptideUtils library:
cd RPeptideUtils/peptideUtils && make
Finally, build and install the RPeptideUtils
package.
cd ..
Rscript -e "install.packages('.', repo = NULL, type = 'source')"
> calcFormula(c("ACLLPETVNMEEYPYDAEY", "ALCAEFK"), subscripts = TRUE)
[1] "C₁₀₂H₁₄₇N₂₁O₃₆S₂" "C₃₇H₅₉N₉O₁₁S"
> calcMass(c("ACLLPETVNMEEYPYDAEY", "ALCAEFK"))
[1] 2305.9764 837.4057
# Look up locations of modified residues in parent protein
> getModifiedResidues(c("Q00839", "Q9HCS7", "Q7L014"), c("APQC*LGK", "FADMEC*K", "GAEIIVC*TPGR"))
[1] "C562" "C676" "C501"
# Concatenate a vector of modified residues into a single string
> combineMods(c('C157', 'C157|C125', 'C50', 'C125'))
[1] "C125|C157|C50"
> getSequences(c("A0MZ66", "A6NMY6"))
[1] "MNSSDEEKQLQLITSLK..."
[2] "MSTVHEILCKLSLEGDH..."
> digest(c("KLGAARKLGAGLAKVIGAGIGIGK", "KLGAARKLGAGLAKPVIGAGIGIGK"), c('a', 'b'))
$a
[1] "LGAGLAK" "VIGAGIGIGK"
$b
[1] "LGAGLAKPVIGAGIGIGK"
> oneLetterToThree(c("AC*LLPETVNMEEYPYDAEY", "ALCAEFK", "AQUPIVER"))
[1] "AlaCys*LeuLeuProGluThrValAsnMetGluGluTyrProTyrAspAlaGluTyr"
[2] "AlaLeuCysAlaGluPheLys"
[3] "AlaGlnSecProIleValGluArg"
> threeLetterToOne(c("Ala-Cys*-Leu-Leu-Pro", "Ala-Leu-Cys-Ala", "Ala-Gln-Sec-Ile"), sep_in="-")
[1] "AC*LLP" "ALCA" "AQUI"