Skip to content

Commit

Permalink
BayesTyper (v1.4)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonas Andreas Sibbesen authored and Jonas Andreas Sibbesen committed Oct 18, 2018
1 parent 2b816ff commit 9cacf3c
Show file tree
Hide file tree
Showing 65 changed files with 1,026 additions and 794 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ SET(BUILD_STATIC 0 CACHE BOOL "Build static")

if(${BUILD_STATIC} EQUAL 0)

SET(CMAKE_CXX_FLAGS "--std=c++11 -lpthread -g -Wall -O3 -DBT_KMER_SIZE='${KMER_SIZE}' -DBT_VERSION='\"v1.3.1 ${GIT_LAST_COMMIT_HASH}\"'")
SET(CMAKE_CXX_FLAGS "--std=c++11 -lpthread -g -O3 -DBT_KMER_SIZE='${KMER_SIZE}' -DBT_VERSION='\"v1.4 ${GIT_LAST_COMMIT_HASH}\"'")

else(${BUILD_STATIC} EQUAL 0)

message(STATUS "Building BayesTyper static")

SET(CMAKE_CXX_FLAGS "--std=c++11 -pthread -O3 -static -static-libgcc -static-libstdc++ -DBT_KMER_SIZE='${KMER_SIZE}' -DBT_VERSION='\"v1.3.1\"'")
SET(CMAKE_CXX_FLAGS "--std=c++11 -pthread -O3 -static -static-libgcc -static-libstdc++ -DBT_KMER_SIZE='${KMER_SIZE}' -DBT_VERSION='\"v1.4\"'")
SET(CMAKE_EXE_LINKER_FLAGS "-Wl,--whole-archive -lpthread -Wl,--no-whole-archive")

SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
Expand Down
14 changes: 9 additions & 5 deletions include/bayesTyper/ChromosomePloidy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,26 @@ THE SOFTWARE.
#define __bayesTyper__ChromosomePloidy_hpp

#include <vector>
#include <string>
#include <unordered_map>

#include "Utils.hpp"
#include "Sample.hpp"
#include "Chromosomes.hpp"

class ChromosomePloidy {

public:

ChromosomePloidy(const vector<Sample> &);
const vector<Utils::Ploidy> & getPloidy(const Utils::ChromClass) const;
ChromosomePloidy(const string &, const Chromosomes &, const vector<Sample> &);

const vector<Utils::Ploidy> & getGenderPloidy(const string &) const;
const vector<Utils::Ploidy> & getSamplePloidy(const string &) const;

private:

vector<Utils::Ploidy> autosomal;
vector<Utils::Ploidy> chrX;
vector<Utils::Ploidy> chrY;
unordered_map<string, vector<Utils::Ploidy> > gender_ploidy;
unordered_map<string, vector<Utils::Ploidy> > sample_ploidy;
};

#endif
2 changes: 0 additions & 2 deletions include/bayesTyper/Chromosomes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ class Chromosomes {
ulong getTotalLength() const;
ulong getDecoyLength() const;

Utils::ChromClass getClass(string) const;

void convertToUpper();

protected:
Expand Down
8 changes: 2 additions & 6 deletions include/bayesTyper/CountDistribution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class CountDistribution {
CountDistribution(const vector<Sample> &, const OptionsContainer &);

const vector<vector<NegativeBinomialDistribution> > & getGenomicCountDistributions() const;
void setGenomicCountDistributions(const vector<vector<KmerStats> > &, const string &);
void setGenomicCountDistributions(const vector<vector<vector<KmerStats> > > &, const string &);

const vector<double> & getNoiseRates() const;
void setNoiseRates(const vector<double> &);
Expand All @@ -76,14 +76,10 @@ class CountDistribution {
double poissonLogProb(const uint, const double) const;

const vector<Sample> samples;
const uchar num_genomic_rate_gc_bias_bins;

const uchar max_multiplicity = Utils::uchar_overflow;
const uchar max_kmer_count = Utils::uchar_overflow;

vector<vector<NegativeBinomialDistribution> > genomic_count_distributions;

const vector<pair<double,double> > noise_rate_priors;
const vector<pair<float,float> > noise_rate_priors;
vector<double> noise_rates;

vector<vector<vector<vector<double> > > > genomic_count_log_pmf_cache;
Expand Down
2 changes: 1 addition & 1 deletion include/bayesTyper/InferenceEngine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class InferenceEngine {

public:

InferenceEngine(const vector<Sample> &, const OptionsContainer &);
InferenceEngine(const vector<Sample> &, const ChromosomePloidy &, const OptionsContainer &);

void estimateNoiseParameters(CountDistribution *, InferenceUnit *, KmerCountsHash *, const string &);
void genotypeVariantClusterGroups(InferenceUnit *, KmerCountsHash *, const CountDistribution &, const Filters & filters, GenotypeWriter *);
Expand Down
9 changes: 5 additions & 4 deletions include/bayesTyper/KmerCounter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ THE SOFTWARE.
#include "InferenceUnit.hpp"
#include "OptionsContainer.hpp"
#include "Chromosomes.hpp"
#include "ChromosomePloidy.hpp"


class KmerCounter {
Expand All @@ -57,10 +58,10 @@ class KmerCounter {

void findVariantClusterPaths(InferenceUnit *, const ushort);
void countPathMultigroupKmers(BooleanKmerHash *, ThreadedKmerBloom<Utils::kmer_size> *, InferenceUnit *);
void countInterclusterParameterKmers(BooleanKmerHash *, const vector<VariantFileParser::InterClusterRegion> &, const float, const Chromosomes &, const ThreadedKmerBloom<Utils::kmer_size> &);
void countInterclusterParameterKmers(BooleanKmerHash *, const vector<VariantFileParser::InterClusterRegion> &, const Chromosomes &, const ThreadedKmerBloom<Utils::kmer_size> &, const float);

void countPathKmers(ThreadedKmerBloom<Utils::kmer_size> *, InferenceUnit *);
void countInterclusterKmers(KmerCountsHash *, ThreadedKmerBloom<Utils::kmer_size> *, const string &, const Chromosomes &);
void countInterclusterKmers(KmerCountsHash *, ThreadedKmerBloom<Utils::kmer_size> *, const string &, const Chromosomes &, const ChromosomePloidy &);

void parseSampleKmers(KmerCountsHash *, ThreadedKmerBloom<Utils::kmer_size> *);
void classifyPathKmers(KmerCountsHash *, InferenceUnit *, const string & );
Expand All @@ -85,10 +86,10 @@ class KmerCounter {

void findVariantClusterPathsCallback(vector<VariantClusterGroup *> *, KmerBloom<Utils::kmer_size> *, const ushort, const ushort, const ushort);
void countPathMultigroupKmersCallback(BooleanKmerHash *, ThreadedKmerBloom<Utils::kmer_size> *, vector<VariantClusterGroup *> *, mutex *, ulong *, const ushort);
void countInterclusterParameterKmersCallback(BooleanKmerHash *, const vector<VariantFileParser::InterClusterRegion> &, const float, const Chromosomes &, const ThreadedKmerBloom<Utils::kmer_size> &, const ushort);
void countInterclusterParameterKmersCallback(BooleanKmerHash *, const vector<VariantFileParser::InterClusterRegion> &, const Chromosomes &, const ThreadedKmerBloom<Utils::kmer_size> &, const float, const ushort);

void countPathKmersCallback(ThreadedKmerBloom<Utils::kmer_size> *, vector<VariantClusterGroup *> *, const ushort);
void countInterclusterKmersCallback(KmerCountsHash *, ThreadedKmerBloom<Utils::kmer_size> *, const vector<VariantFileParser::InterClusterRegion> &, const Chromosomes &, const ushort);
void countInterclusterKmersCallback(KmerCountsHash *, ThreadedKmerBloom<Utils::kmer_size> *, const vector<VariantFileParser::InterClusterRegion> &, const Chromosomes &, const ChromosomePloidy &, const ushort);

void parseSampleKmersCallBack(KmerCountsHash *, ThreadedKmerBloom<Utils::kmer_size> *, ProducerConsumerQueue<KmerBatchInfo *> *, ProducerConsumerQueue<KmerBatchInfo *> *);
void classifyPathKmersCallback(KmerCountsHash *, KmerBloom<Utils::kmer_size> *, vector<VariantClusterGroup *> *, const ushort);
Expand Down
7 changes: 4 additions & 3 deletions include/bayesTyper/KmerCounts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ class KmerCounts {

bool isExcluded();

void addInterclusterMultiplicity(const Utils::ChromClass);
void addInterclusterMultiplicity(const bool, const vector<Utils::Ploidy> &);
uchar getInterclusterMultiplicity(const Utils::Gender);
uchar getMaxInterclusterMultiplicity();

void addClusterMultiplicity(const uchar, const bool);

Expand All @@ -68,10 +69,10 @@ class KmerCounts {

uchar has_cluster_occ : 1, has_multicluster_occ : 1, has_multigroup_occ : 1, has_decoy_occ : 1, has_max_multiplicity : 1, is_parameter : 1;

uchar max_multiplicity;
uchar max_haploid_multiplicity;

uchar male_intercluster_multiplicity;
uchar female_intercluster_multiplicity;
uchar male_intercluster_multiplicity;

uchar updateMultiplicity(const uchar, const uchar);
};
Expand Down
4 changes: 2 additions & 2 deletions include/bayesTyper/KmerHash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class KmerCountsHash {
virtual void sortKmers() = 0;

virtual void writeRootSizeDistribution(const string &) = 0;
virtual vector<vector<KmerStats> > calculateKmerStats(const ushort, const uchar) = 0;
virtual vector<vector<vector<KmerStats> > > calculateKmerStats(const vector<Sample> &) = 0;
};

template<uchar sample_bin>
Expand All @@ -103,7 +103,7 @@ class ObservedKmerCountsHash : public KmerCountsHash {
void sortKmers();

void writeRootSizeDistribution(const string &);
vector<vector<KmerStats> > calculateKmerStats(const ushort, const uchar);
vector<vector<vector<KmerStats> > > calculateKmerStats(const vector<Sample> &);
};

#endif
11 changes: 8 additions & 3 deletions include/bayesTyper/OptionsContainer.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,11 @@ void OptionsContainer::parseValuePair(std::string option, std::string value_pair
std::vector<std::string> value_pair_str_split;
split(value_pair_str_split, value_pair_str, boost::is_any_of(","));

assert(value_pair_str_split.size() == 2);
if (value_pair_str_split.size() != 2) {

cerr << "\nERROR: Argument to option \"" << option << "\" should be two values (comma-seperated)\n" << endl;
exit(1);
}

if (typeid(ValueType) == typeid(uint)) {

Expand All @@ -78,8 +82,9 @@ void OptionsContainer::parseValuePair(std::string option, std::string value_pair

} else {

value_pair.first = stod(value_pair_str_split.front());
value_pair.second = stod(value_pair_str_split.back());
assert(typeid(ValueType) == typeid(float));
value_pair.first = stof(value_pair_str_split.front());
value_pair.second = stof(value_pair_str_split.back());
}

OptionValue<std::pair<ValueType,ValueType> > * option_value = new OptionValue<std::pair<ValueType,ValueType> >(value_pair);
Expand Down
11 changes: 3 additions & 8 deletions include/bayesTyper/SparsityEstimator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ THE SOFTWARE.

#include <random>

#include "Eigen/Dense"

#include "Utils.hpp"


Expand All @@ -42,15 +40,12 @@ class SparsityEstimator {
public:

SparsityEstimator(const uint);
uint estimateMinimumSetCover(Utils::MatrixXuchar const &, Utils::RowVectorXbool *);
vector<bool> & getMinimumSet();

unordered_set<ushort> estimateMinimumColumnCover(const Utils::MatrixXuchar &, Utils::RowVectorXbool *);

private:

mt19937 prng;
uniform_int_distribution<> uniform_int_dist;

vector<bool> in_minimum_set;
};

#endif
#endif
8 changes: 3 additions & 5 deletions include/bayesTyper/Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,9 @@ namespace Utils {

static const uint kmer_size = BT_KMER_SIZE;

typedef Eigen::Matrix<uchar,Eigen::Dynamic,Eigen::Dynamic, Eigen::RowMajor> MatrixXuchar;
typedef Eigen::Matrix<bool,Eigen::Dynamic,Eigen::Dynamic> MatrixXbool;
typedef Eigen::Matrix<uchar,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor> MatrixXuchar;
typedef Eigen::Matrix<bool,1,Eigen::Dynamic,Eigen::RowMajor> RowVectorXbool;
typedef Eigen::Matrix<bool,Eigen::Dynamic,1> ColVectorXbool;
typedef Eigen::Matrix<uint,1,Eigen::Dynamic,Eigen::RowMajor> RowVectorXuint;

static const double double_precision = numeric_limits<double>::epsilon();
static const float float_precision = numeric_limits<float>::epsilon();
Expand All @@ -75,9 +74,8 @@ namespace Utils {

static const ushort queue_size_thread_scaling = 2;

enum class ChromClass : uchar {Autosomal = 0, X, Y, Decoy, CHROM_CLASS_SIZE};
enum class Ploidy : uchar {Null = 0, Haploid, Diploid, PLOIDY_SIZE};
enum class Gender : uchar {Male = 0, Female, GENDER_SIZE};
enum class Gender : uchar {Female = 0, Male, GENDER_SIZE};


inline bool doubleCompare(const double a, const double b) {
Expand Down
3 changes: 1 addition & 2 deletions include/bayesTyper/VariantCluster.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class VariantCluster {

public:

enum class VariantType : uchar {SNP = 0, Insertion, Deletion, Complex, Mixture, Unsupported, VARIANT_TYPE_SIZE};
enum class VariantType : uchar {SNV = 0, Insertion, Deletion, Complex, Mixture, Unsupported, VARIANT_TYPE_SIZE};

protected:

Expand Down Expand Up @@ -101,7 +101,6 @@ class VariantCluster {
uint right_flank;

string chrom_name;
Utils::ChromClass chrom_class;

map<uint, Variant> variants;
set<ContainedCluster, ContainedClusterCompare> contained_clusters;
Expand Down
8 changes: 3 additions & 5 deletions include/bayesTyper/VariantClusterGroup.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,13 @@ class VariantClusterGroup {
};

string chrom_name;
Utils::ChromClass chrom_class;

uint start_position;
uint end_position;

uint num_variants;

bool is_snv;
bool is_parameter_cluster;

vector<VariantClusterVertex> vertices;
vector<vector<uint> > out_edges;
Expand All @@ -96,11 +95,10 @@ class VariantClusterGroup {
void serialize(Archive & ar, const unsigned int version) {

ar & chrom_name;
ar & chrom_class;
ar & start_position;
ar & end_position;
ar & num_variants;
ar & is_snv;
ar & is_parameter_cluster;
ar & vertices;
ar & out_edges;
ar & source_vertices;
Expand All @@ -125,7 +123,7 @@ class VariantClusterGroup {
void countPathKmers(unordered_set<bitset<Utils::kmer_size * 2> > *);
void classifyPathKmers(KmerCountsHash *, KmerBloom<Utils::kmer_size> *);

bool isAutosomalSimpleSNV() const;
bool isSimpleParameterCluster() const;

void initGenotyper(KmerCountsHash *, const vector<Sample> &, const uint, const uchar, const float, const uint);

Expand Down
16 changes: 8 additions & 8 deletions include/bayesTyper/VariantFileParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ class VariantFileParser {
struct InterClusterRegion {

string chrom_name;
Utils::ChromClass chrom_class;
bool is_decoy;

uint start;
uint end;
uint start_position;
uint end_position;

InterClusterRegion(const string & chrom_name_in, const Utils::ChromClass & chrom_class_in, const uint start_in, const uint end_in) : chrom_name(chrom_name_in), chrom_class(chrom_class_in), start(start_in), end(end_in) {}
InterClusterRegion(const string & chrom_name_in, const bool is_decoy_in, const uint start_position_in, const uint end_position_in) : chrom_name(chrom_name_in), is_decoy(is_decoy_in), start_position(start_position_in), end_position(end_position_in) {}
};

string getVariantStatsString(const uint);
Expand All @@ -88,7 +88,7 @@ class VariantFileParser {

const ushort num_threads;
const uint max_allele_length;
const double copy_number_variant_threshold;
const float copy_number_variant_threshold;

enum class AlleleCount : uchar {Total = 0, Excluded_decoy, Excluded_genome, Excluded_match, Excluded_end, Excluded_length, ALLELE_COUNT_SIZE};

Expand Down Expand Up @@ -118,9 +118,9 @@ class VariantFileParser {
vector<string> variant_line;

void openVariantFile(const string &);
void updateVariantLine();
bool updateVariantLine();

void addSequenceToInterclusterRegions(const string &, const Utils::ChromClass &, const uint, const uint);
void addSequenceToInterclusterRegions(const string &, const bool, const uint, const uint);

void parseVariants(ProducerConsumerQueue<vector<unordered_map<uint, VariantCluster*> * > * > *, const uint, const Chromosomes &);

Expand All @@ -131,7 +131,7 @@ class VariantFileParser {
VariantCluster::VariantType classifyAllele(const int, const int);

uint copyNumberVariantLength(const string &, const string &, const uint);
void clusterVariants(VariantCluster::Variant &, const uint, const set<uint>, const string &, const Utils::ChromClass &, map<uint, VariantCluster*> *, unordered_map<uint, VariantCluster*> *, list<unordered_set<uint> > *);
void clusterVariants(VariantCluster::Variant &, const uint, const set<uint>, const string &, map<uint, VariantCluster*> *, unordered_map<uint, VariantCluster*> *, list<unordered_set<uint> > *);

void processVariantClusterGroups(ProducerConsumerQueue<vector<unordered_map<uint, VariantCluster*> * > * > *, vector<unordered_map<uint, VariantCluster*> * > **, uint *, unordered_map<uint, VariantCluster*> **, list<unordered_set<uint> > *, map<uint, VariantCluster*> *);
void mergeVariantClusters(unordered_map<uint, VariantCluster*> *, list<unordered_set<uint> > &);
Expand Down
Loading

0 comments on commit 9cacf3c

Please sign in to comment.