Mercurial > repos > iuc > gemini_annotate
annotate test-data/util/build-gemini-testdata.sh @ 9:cf0f0f05ba9f draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit f7bdf08922aaf4119aefe7041e754a69cf64aebd
| author | iuc | 
|---|---|
| date | Wed, 13 Jul 2022 15:23:12 +0000 | 
| parents | 5bcaca8085bd | 
| children | 
| rev | line source | 
|---|---|
| 
4
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
1 cd "$(dirname "$0")" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
2 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
3 export GEMINI_CONFIG=../test-cache | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
4 OUT_PTH=$GEMINI_CONFIG/gemini/data | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
5 GENOMIC_REGION=3:187000000-187500000 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
6 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
7 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
8 if [ -n "$1" ]; then | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
9 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
10 IN_PTH="$1" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
11 # downsample all vcf and bed annotation files to the region of interest and reindex | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
12 for vcf in `ls $IN_PTH/*.gz | grep -v hprd_interaction_edges.gz -` | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
13 do | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
14 python ./shrink_tabix.py $vcf -r $GENOMIC_REGION -o $OUT_PTH/`basename $vcf` | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
15 done | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
16 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
17 # downsample gene_table files to the region of interest | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
18 echo "$IN_PTH/summary_gene_table_v75 -> $OUT_PTH/summary_gene_table_v75" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
19 python ./shrink_simple_tab.py $IN_PTH/summary_gene_table_v75 -r chr$GENOMIC_REGION -c 0 8 9 -n 1 -o $OUT_PTH/summary_gene_table_v75 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
20 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
21 echo "$IN_PTH/detailed_gene_table_v75 -> $OUT_PTH/detailed_gene_table_v75" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
22 python ./shrink_simple_tab.py $IN_PTH/detailed_gene_table_v75 -r chr$GENOMIC_REGION -c 0 11 12 -n 1 -o $OUT_PTH/detailed_gene_table_v75 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
23 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
24 # filter kegg_pathway files to retain only records of the genes listed | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
25 # in the downsampled summary_gene_table | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
26 for kegg in `ls $IN_PTH/kegg_pathways_*` | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
27 do | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
28 echo "$kegg -> $OUT_PTH/`basename $kegg`" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
29 cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $kegg > $OUT_PTH/`basename $kegg` | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
30 done | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
31 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
32 # filter hprd_interaction file to retain only records of the genes listed | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
33 # in the downsampled summary_gene_table | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
34 echo "$IN_PTH/hprd_interaction_edges.gz -> $OUT_PTH/hprd_interaction_edges.gz" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
35 bgzip -dc $IN_PTH/hprd_interaction_edges.gz > $OUT_PTH/hprd_interaction_edges | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
36 cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Ff - $OUT_PTH/hprd_interaction_edges | bgzip > $OUT_PTH/hprd_interaction_edges.gz | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
37 rm $OUT_PTH/hprd_interaction_edges | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
38 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
39 # filter cancer_gene_census file to retain only records of the genes listed | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
40 # in the downsampled summary_gene_table; | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
41 # TO DO: make the filter stricter by looking for matches only in the first | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
42 # column of the cancer_gene_census file (but the file is relatively small anyway) | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
43 echo "$IN_PTH/cancer_gene_census.20140120.tsv -> $OUT_PTH/cancer_gene_census.20140120.tsv" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
44 cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $IN_PTH/cancer_gene_census.20140120.tsv > $OUT_PTH/cancer_gene_census.20140120.tsv | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
45 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
46 else | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
47 echo "no path to gemini annotation files provided - only building test databases" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
48 fi | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
49 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
50 | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
51 # now use gemini load to build the test databases | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
52 echo "Building gemini test databases" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
53 echo "Test databases for gemini_load" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
54 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff ../gemini_load_result1.db | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
55 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff --skip-gene-tables --no-load-genotypes ../gemini_load_result2.db | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
56 echo "Test database for gemini_amend" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
57 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -t snpEff ../gemini_amend_input.db | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
58 echo "Test database for gemini_annotate" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
59 bgzip -c build-data anno.bed > build-data/anno.bed.gz | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
60 tabix --force -p bed build-data/anno.bed.gz | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
61 cp ../gemini_load_result1.db ../gemini_annotate_result.db | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
62 gemini --annotation-dir $OUT_PTH annotate -f build-data/anno.bed.gz -c anno5 -a count ../gemini_annotate_result.db | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
63 echo "Test database for gemini_set_somatic" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
64 cp ../gemini_load_result1.db ../gemini_is_somatic_result.db | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
65 gemini set_somatic --min-somatic-score 5.65 ../gemini_is_somatic_result.db | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
66 echo "Test database for gemini_de_novo and gemini_mendel_errors" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
67 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.de_novo.vcf -p build-data/test.de_novo.ped -t snpEff ../gemini_de_novo_input.db | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
68 echo "Test database for gemini_comp_hets" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
69 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.comp_het.vcf -p build-data/test.comp_het.ped -t snpEff ../gemini_comphets_input.db | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
70 echo "Test databases for gemini_autosomal" | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
71 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -p build-data/test.auto_rec.ped -t snpEff ../gemini_auto_rec_input.db | 
| 
 
5bcaca8085bd
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 283362494058ed64143b1f27afb447b8a1cb4313
 
iuc 
parents:  
diff
changeset
 | 
72 gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_dom.vcf -p build-data/test.auto_dom.ped -t snpEff ../gemini_auto_dom_input.db | 
