annotate test-data/util/build-gemini-testdata.sh @ 7:7d75673f4d6f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
author iuc
date Tue, 10 Mar 2020 06:20:59 -0400
parents f3cc64057b4e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
1 cd "$(dirname "$0")"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
2
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
3 export GEMINI_CONFIG=../test-cache
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
4 OUT_PTH=$GEMINI_CONFIG/gemini/data
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
5 GENOMIC_REGION=3:187000000-187500000
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
6
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
7
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
8 if [ -n "$1" ]; then
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
9
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
10 IN_PTH="$1"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
11 # downsample all vcf and bed annotation files to the region of interest and reindex
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
12 for vcf in `ls $IN_PTH/*.gz | grep -v hprd_interaction_edges.gz -`
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
13 do
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
14 python ./shrink_tabix.py $vcf -r $GENOMIC_REGION -o $OUT_PTH/`basename $vcf`
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
15 done
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
16
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
17 # downsample gene_table files to the region of interest
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
18 echo "$IN_PTH/summary_gene_table_v75 -> $OUT_PTH/summary_gene_table_v75"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
19 python ./shrink_simple_tab.py $IN_PTH/summary_gene_table_v75 -r chr$GENOMIC_REGION -c 0 8 9 -n 1 -o $OUT_PTH/summary_gene_table_v75
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
20
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
21 echo "$IN_PTH/detailed_gene_table_v75 -> $OUT_PTH/detailed_gene_table_v75"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
22 python ./shrink_simple_tab.py $IN_PTH/detailed_gene_table_v75 -r chr$GENOMIC_REGION -c 0 11 12 -n 1 -o $OUT_PTH/detailed_gene_table_v75
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
23
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
24 # filter kegg_pathway files to retain only records of the genes listed
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
25 # in the downsampled summary_gene_table
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
26 for kegg in `ls $IN_PTH/kegg_pathways_*`
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
27 do
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
28 echo "$kegg -> $OUT_PTH/`basename $kegg`"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
29 cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $kegg > $OUT_PTH/`basename $kegg`
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
30 done
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
31
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
32 # filter hprd_interaction file to retain only records of the genes listed
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
33 # in the downsampled summary_gene_table
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
34 echo "$IN_PTH/hprd_interaction_edges.gz -> $OUT_PTH/hprd_interaction_edges.gz"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
35 bgzip -dc $IN_PTH/hprd_interaction_edges.gz > $OUT_PTH/hprd_interaction_edges
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
36 cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Ff - $OUT_PTH/hprd_interaction_edges | bgzip > $OUT_PTH/hprd_interaction_edges.gz
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
37 rm $OUT_PTH/hprd_interaction_edges
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
38
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
39 # filter cancer_gene_census file to retain only records of the genes listed
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
40 # in the downsampled summary_gene_table;
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
41 # TO DO: make the filter stricter by looking for matches only in the first
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
42 # column of the cancer_gene_census file (but the file is relatively small anyway)
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
43 echo "$IN_PTH/cancer_gene_census.20140120.tsv -> $OUT_PTH/cancer_gene_census.20140120.tsv"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
44 cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $IN_PTH/cancer_gene_census.20140120.tsv > $OUT_PTH/cancer_gene_census.20140120.tsv
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
45
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
46 else
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
47 echo "no path to gemini annotation files provided - only building test databases"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
48 fi
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
49
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
50
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
51 # now use gemini load to build the test databases
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
52 echo "Building gemini test databases"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
53 echo "Test databases for gemini_load"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
54 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff ../gemini_load_result1.db
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
55 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff --skip-gene-tables --no-load-genotypes ../gemini_load_result2.db
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
56 echo "Test database for gemini_amend"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
57 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -t snpEff ../gemini_amend_input.db
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
58 echo "Test database for gemini_annotate"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
59 bgzip -c build-data anno.bed > build-data/anno.bed.gz
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
60 tabix --force -p bed build-data/anno.bed.gz
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
61 cp ../gemini_load_result1.db ../gemini_annotate_result.db
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
62 gemini --annotation-dir $OUT_PTH annotate -f build-data/anno.bed.gz -c anno5 -a count ../gemini_annotate_result.db
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
63 echo "Test database for gemini_set_somatic"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
64 cp ../gemini_load_result1.db ../gemini_is_somatic_result.db
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
65 gemini set_somatic --min-somatic-score 5.65 ../gemini_is_somatic_result.db
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
66 echo "Test database for gemini_de_novo and gemini_mendel_errors"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
67 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.de_novo.vcf -p build-data/test.de_novo.ped -t snpEff ../gemini_de_novo_input.db
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
68 echo "Test database for gemini_comp_hets"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
69 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.comp_het.vcf -p build-data/test.comp_het.ped -t snpEff ../gemini_comphets_input.db
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
70 echo "Test databases for gemini_autosomal"
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
71 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -p build-data/test.auto_rec.ped -t snpEff ../gemini_auto_rec_input.db
f3cc64057b4e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
iuc
parents:
diff changeset
72 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_dom.vcf -p build-data/test.auto_dom.ped -t snpEff ../gemini_auto_dom_input.db