-
Notifications
You must be signed in to change notification settings - Fork 8
/
data_download_hg38.sh
48 lines (33 loc) · 2 KB
/
data_download_hg38.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
# download and process databse file
VEP_release="release-97"
echo "download reference file"
mkdir database && cd database
mkdir Fasta && cd Fasta
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/Homo_sapiens_assembly38.fasta.gz && gunzip Homo_sapiens_assembly38.fasta.gz
python ../../bin/reference_process.py
bwa index human.fasta
java -jar ../../software/picard.jar CreateSequenceDictionary R=human.fasta O=human.dict
samtools faidx human.fasta
python ../../bin/pyfasta_index.py
sequenza-utils gc_wiggle -w 50 --fasta human.fasta -o hg38.gc50Base.wig.gz
rm Homo_sapiens_assembly38.fasta
cd ..
mkdir VCF_annotation && cd VCF_annotation
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/1000G_phase1.snps.high_confidence.hg38.vcf.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/1000G_phase1.snps.high_confidence.hg38.vcf.gz.tbi
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/dbsnp_138.hg38.vcf.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/dbsnp_138.hg38.vcf.gz.tbi
cd ..
mkdir Protein && cd Protein
wget ftp://ftp.ensembl.org/pub/${VEP_release}/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz && gunzip Homo_sapiens.GRCh38.cdna.all.fa.gz
mv Homo_sapiens.GRCh38.cdna.all.fa human.cdna.all.fa
wget ftp://ftp.ensembl.org/pub/${VEP_release}/fasta/homo_sapiens/pep/Homo_sapiens.GRCh38.pep.all.fa.gz && gunzip Homo_sapiens.GRCh38.pep.all.fa.gz
mv Homo_sapiens.GRCh38.pep.all.fa human.pep.all.fa
makeblastdb -in human.pep.all.fa -dbtype prot -out peptide_database/peptide -parse_seqids
cd ..
cd ..
cd database && mkdir vep_data && cd vep_data
wget ftp://ftp.ensembl.org/pub/${VEP_release}/variation/vep/homo_sapiens_vep_97_GRCh38.tar.gz && tar xvzf homo_sapiens_vep_97_GRCh38.tar.gz