Mercurial > repos > iuc > lofreq_viterbi
changeset 0:33a416e1659e draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lofreq commit 9efcb813ab17041c7f5aad834dfff45bd7046c60"
author | iuc |
---|---|
date | Tue, 17 Dec 2019 17:25:37 -0500 |
parents | |
children | ecd80c7c3886 |
files | lofreq_viterbi.xml macros.xml test-data/alnqual-out1.bam test-data/alnqual-out2.bam test-data/alnqual-out3.bam test-data/alnqual-out4.bam test-data/alnqual-out5.bam test-data/call-out1.vcf test-data/call-out2.vcf test-data/indelqual-out1.bam test-data/indelqual-out2.bam test-data/indelqual-out3.bam test-data/lofreq-in1.bam test-data/pBR322.fa test-data/viterbi-out1.bam test-data/viterbi-out2.bam tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample |
diffstat | 18 files changed, 295 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lofreq_viterbi.xml Tue Dec 17 17:25:37 2019 -0500 @@ -0,0 +1,51 @@ +<tool id="lofreq_viterbi" name="Realign reads" version="@WRAPPER_VERSION@0" python_template_version="3.5"> + <description>with LoFreq viterbi</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="1.9">samtools</requirement> + </expand> + <command detect_errors="exit_code"><![CDATA[ + @PREPARE_REF@ + lofreq viterbi --ref '$reference_fasta_fn' --out - $keepflags --defqual $defqual '$reads' | samtools sort - -O BAM -o '$realigned' + ]]></command> + <inputs> + <param name="reads" type="data" format="bam" label="Reads to realign" /> + <expand macro="reference_interface" /> + <param argument="--keepflags" type="boolean" truevalue="--keepflags" label="Don't delete flags MC, MD, NM, and A" help="These flags are all prone to changing during realignment" /> + <param argument="--defqual" type="integer" value="-1" label="Quality to assume for all bases with BQ2" /> + </inputs> + <outputs> + <data name="realigned" format="bam" label="${tool.name} on ${on_string}: Realigned reads" /> + </outputs> + <tests> + <test> + <param name="reads" ftype="bam" value="lofreq-in1.bam" /> + <param name="ref_selector" value="history" /> + <param name="ref" ftype="fasta" value="pBR322.fa" /> + <output name="realigned" file="viterbi-out1.bam" /> + </test> + <test> + <param name="reads" ftype="bam" value="lofreq-in1.bam" /> + <param name="ref_selector" value="history" /> + <param name="keepflags" value="true" /> + <param name="ref" ftype="fasta" value="pBR322.fa" /> + <output name="realigned" file="viterbi-out2.bam" /> + </test> + </tests> + <help><![CDATA[ +Usage: lofreq viterbi [options] in.bam + +Options: + -f | --ref FILE Indexed reference fasta file [null] + -k | --keepflags Don't delete flags MC, MD, NM and A, which are all prone to change during realignment. + -q | --defqual INT Assume INT as quality for all bases with BQ2. Default (=-1) is to use median quality of bases in read. + -o | --out FILE Output BAM file [- = stdout = default] + --verbose Be verbose + +NOTE: Output BAM file will (likely) be unsorted (use samtools sort, e.g. lofreq viterbi ... | samtools sort -') + + ]]></help> + <expand macro="citations" /> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Dec 17 17:25:37 2019 -0500 @@ -0,0 +1,88 @@ +<macros> + <token name="@WRAPPER_VERSION@">@TOOL_VERSION@+galaxy</token> + <token name="@TOOL_VERSION@">2.1.3.1</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">lofreq</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/nar/gks918</citation> + <yield /> + </citations> + </xml> + <token name="@PREPARE_REF@"><![CDATA[ + #if str($reference_source.ref_selector) == 'history': + #set $reference_fasta_fn = 'reference.fa' + ln -s '$reference_source.ref' $reference_fasta_fn && + lofreq faidx $reference_fasta_fn 2>&1 || echo "Error running samtools faidx for indexing fasta reference for lofreq" >&2 && + #else + #set $reference_fasta_fn = str($reference_source.ref.fields.path) + #end if + ]]></token> + <xml name="reference_interface"> + <conditional name="reference_source"> + <param name="ref_selector" type="select" + label="Choose the source for the reference genome"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param argument="--ref" type="select" + label="Reference genome"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" column="dbkey" key="dbkey" ref="reads" /> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" /> + </options> + </param> + </when> + <when value="history"> + <param argument="--ref" type="data" format="fasta" label="Reference" help="Reference sequence" /> + </when> + </conditional> + </xml> + <xml name="handle_existing_alnqual"> + <conditional name="alnqual_choice"> + <param name="alnquals_to_use" type="select" + label="Use the following alignment quality scores"> + <option value="">Base and indel alignment qualities (BAQ and IDAQ)</option> + <option value="-A">Only base alignment qualities (BAQ)</option> + <option value="-B">Only indel alignment qualities (IDAQ)</option> + </param> + <when value="-B"> + <param name="extended_baq" type="hidden" value="" /> + </when> + <when value=""> + <param argument="-e" name="extended_baq" type="boolean" checked="true" truevalue="" falsevalue="-e" + label="If BAQ needs to be computed, calculate extended BAQ?" /> + </when> + <when value="-A"> + <param argument="-e" name="extended_baq" type="boolean" checked="true" truevalue="" falsevalue="-e" + label="If BAQ needs to be computed, calculate extended BAQ?" /> + </when> + </conditional> + </xml> + <xml name="handle_alnqual" token_mode="Use"> + <conditional name="alnqual_choice"> + <param name="alnquals_to_use" type="select" + label="@MODE@ the following alignment quality scores"> + <option value="">Base and indel alignment qualities (BAQ and IDAQ)</option> + <option value="-A">Only base alignment qualities (BAQ)</option> + <option value="-B">Only indel alignment qualities (IDAQ)</option> + </param> + <when value="-B"> + <param name="extended_baq" type="hidden" value="" /> + </when> + <when value=""> + <param argument="-e" name="extended_baq" type="boolean" checked="true" truevalue="" falsevalue="-e" + label="Use extended BAQ?" /> + </when> + <when value="-A"> + <param argument="-e" name="extended_baq" type="boolean" checked="true" truevalue="" falsevalue="-e" + label="Use extended BAQ?" /> + </when> + </conditional> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/call-out1.vcf Tue Dec 17 17:25:37 2019 -0500 @@ -0,0 +1,19 @@ +##fileformat=VCFv4.0 +##fileDate=20191125 +##source=lofreq call --verbose --ref reference.fa --sig 0.01 --bonf dynamic --no-default-filter -r pBR322:1-2180 -o /tmp/lofreq2_call_parallel3mrmthi_/0.vcf.gz alignments.bam +##reference=reference.fa +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw Depth"> +##INFO=<ID=AF,Number=1,Type=Float,Description="Allele Frequency"> +##INFO=<ID=SB,Number=1,Type=Integer,Description="Phred-scaled strand bias at this position"> +##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"> +##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> +##INFO=<ID=CONSVAR,Number=0,Type=Flag,Description="Indicates that the variant is a consensus variant (as opposed to a low frequency variant)."> +##INFO=<ID=HRUN,Number=1,Type=Integer,Description="Homopolymer length to the right of report indel position"> +##FILTER=<ID=min_snvqual_38,Description="Minimum SNV Quality (Phred) 38"> +##FILTER=<ID=min_indelqual_20,Description="Minimum Indel Quality (Phred) 20"> +##FILTER=<ID=min_dp_10,Description="Minimum Coverage 10"> +##FILTER=<ID=sb_fdr,Description="Strand-Bias Multiple Testing Correction: fdr corr. pvalue > 0.001000"> +##FILTER=<ID=min_snvqual_38,Description="Minimum SNV Quality (Phred) 38"> +##FILTER=<ID=min_indelqual_20,Description="Minimum Indel Quality (Phred) 20"> +#CHROM POS ID REF ALT QUAL FILTER INFO +pBR322 1134 . C T 49314 PASS DP=1767;AF=1.000000;SB=0;DP4=0,0,910,857
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/call-out2.vcf Tue Dec 17 17:25:37 2019 -0500 @@ -0,0 +1,27 @@ +##fileformat=VCFv4.0 +##fileDate=20191204 +##source=lofreq call --verbose --ref reference.fa --sig 1 --bonf 1 --no-default-filter --no-default-filter -r pBR322:1-2180 -o /tmp/tmpjsbggC/job_working_directory/000/8/working/pp-tmp/lofreq2_call_parallelj9yxuugx/0.vcf.gz reads.bam +##reference=reference.fa +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw Depth"> +##INFO=<ID=AF,Number=1,Type=Float,Description="Allele Frequency"> +##INFO=<ID=SB,Number=1,Type=Integer,Description="Phred-scaled strand bias at this position"> +##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"> +##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> +##INFO=<ID=CONSVAR,Number=0,Type=Flag,Description="Indicates that the variant is a consensus variant (as opposed to a low frequency variant)."> +##INFO=<ID=HRUN,Number=1,Type=Integer,Description="Homopolymer length to the right of report indel position"> +#CHROM POS ID REF ALT QUAL FILTER INFO +pBR322 815 . A G 0 . DP=665;AF=0.003008;SB=6;DP4=333,311,0,2 +pBR322 861 . A C 0 . DP=946;AF=0.002114;SB=3;DP4=447,497,0,2 +pBR322 1001 . A C 0 . DP=1797;AF=0.000556;SB=3;DP4=877,918,1,0 +pBR322 1013 . C G 0 . DP=1773;AF=0.000564;SB=0;DP4=875,897,0,1 +pBR322 1068 . T G 0 . DP=1774;AF=0.000564;SB=3;DP4=853,920,1,0 +pBR322 1084 . G T 0 . DP=1789;AF=0.000559;SB=3;DP4=875,913,1,0 +pBR322 1113 . T A 0 . DP=1784;AF=0.000561;SB=0;DP4=885,898,0,1 +pBR322 1134 . C T 49314 . DP=1767;AF=1.000000;SB=0;DP4=0,0,910,857 +pBR322 1193 . G A 0 . DP=1698;AF=0.000589;SB=3;DP4=865,832,0,1 +pBR322 1218 . A C 0 . DP=1708;AF=0.000585;SB=3;DP4=875,831,0,1 +pBR322 1230 . T C 0 . DP=1759;AF=0.000569;SB=3;DP4=907,850,0,1 +pBR322 1256 . A G 0 . DP=1746;AF=0.000573;SB=0;DP4=902,842,1,0 +pBR322 1498 . C G 0 . DP=1195;AF=0.000837;SB=3;DP4=588,606,1,0 +pBR322 1503 . T G 0 . DP=1156;AF=0.000865;SB=3;DP4=563,592,1,0 +pBR322 1505 . G A 0 . DP=1137;AF=0.000880;SB=0;DP4=560,576,0,1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pBR322.fa Tue Dec 17 17:25:37 2019 -0500 @@ -0,0 +1,74 @@ +>pBR322 +TTCTCATGTTTGACAGCTTATCATCGATAAGCTTTAATGCGGTAGTTTATCACAGTTAAA +TTGCTAACGCAGTCAGGCACCGTGTATGAAATCTAACAATGCGCTCATCGTCATCCTCGG +CACCGTCACCCTGGATGCTGTAGGCATAGGCTTGGTTATGCCGGTACTGCCGGGCCTCTT +GCGGGATATCGTCCATTCCGACAGCATCGCCAGTCACTATGGCGTGCTGCTAGCGCTATA +TGCGTTGATGCAATTTCTATGCGCACCCGTTCTCGGAGCACTGTCCGACCGCTTTGGCCG +CCGCCCAGTCCTGCTCGCTTCGCTACTTGGAGCCACTATCGACTACGCGATCATGGCGAC +CACACCCGTCCTGTGGATCCTCTACGCCGGACGCATCGTGGCCGGCATCACCGGCGCCAC +AGGTGCGGTTGCTGGCGCCTATATCGCCGACATCACCGATGGGGAAGATCGGGCTCGCCA +CTTCGGGCTCATGAGCGCTTGTTTCGGCGTGGGTATGGTGGCAGGCCCCGTGGCCGGGGG +ACTGTTGGGCGCCATCTCCTTGCATGCACCATTCCTTGCGGCGGCGGTGCTCAACGGCCT +CAACCTACTACTGGGCTGCTTCCTAATGCAGGAGTCGCATAAGGGAGAGCGTCGACCGAT +GCCCTTGAGAGCCTTCAACCCAGTCAGCTCCTTCCGGTGGGCGCGGGGCATGACTATCGT +CGCCGCACTTATGACTGTCTTCTTTATCATGCAACTCGTAGGACAGGTGCCGGCAGCGCT +CTGGGTCATTTTCGGCGAGGACCGCTTTCGCTGGAGCGCGACGATGATCGGCCTGTCGCT +TGCGGTATTCGGAATCTTGCACGCCCTCGCTCAAGCCTTCGTCACTGGTCCCGCCACCAA +ACGTTTCGGCGAGAAGCAGGCCATTATCGCCGGCATGGCGGCCGACGCGCTGGGCTACGT +CTTGCTGGCGTTCGCGACGCGAGGCTGGATGGCCTTCCCCATTATGATTCTTCTCGCTTC +CGGCGGCATCGGGATGCCCGCGTTGCAGGCCATGCTGTCCAGGCAGGTAGATGACGACCA +TCAGGGACAGCTTCAAGGATCGCTCGCGGCTCTTACCAGCCTAACTTCGATCACTGGACC +GCTGATCGTCACGGCGATTTATGCCGCCTCGGCGAGCACATGGAACGGGTTGGCATGGAT +TGTAGGCGCCGCCCTATACCTTGTCTGCCTCCCCGCGTTGCGTCGCGGTGCATGGAGCCG +GGCCACCTCGACCTGAATGGAAGCCGGCGGCACCTCGCTAACGGATTCACCACTCCAAGA +ATTGGAGCCAATCAATTCTTGCGGAGAACTGTGAATGCGCAAACCAACCCTTGGCAGAAC +ATATCCATCGCGTCCGCCATCTCCAGCAGCCGCACGCGGCGCATCTCGGGCAGCGTTGGG +TCCTGGCCACGGGTGCGCATGATCGTGCTCCTGTCGTTGAGGACCCGGCTAGGCTGGCGG +GGTTGCCTTACTGGTTAGCAGAATGAATCACCGATACGCGAGCGAACGTGAAGCGACTGC +TGCTGCAAAACGTCTGCGACCTGAGCAACAACATGAATGGTCTTCGGTTTCCGTGTTTCG +TAAAGTCTGGAAACGCGGAAGTCAGCGCCCTGCACCATTATGTTCCGGATCTGCATCGCA +GGATGCTGCTGGCTACCCTGTGGAACACCTACATCTGTATTAACGAAGCGCTGGCATTGA +CCCTGAGTGATTTTTCTCTGGTCCCGCCGCATCCATACCGCCAGTTGTTTACCCTCACAA +CGTTCCAGTAACCGGGCATGTTCATCATCAGTAACCCGTATCGTGAGCATCCTCTCTCGT +TTCATCGGTATCATTACCCCCATGAACAGAAATCCCCCTTACACGGAGGCATCAGTGACC +AAACAGGAAAAAACCGCCCTTAACATGGCCCGCTTTATCAGAAGCCAGACATTAACGCTT +CTGGAGAAACTCAACGAGCTGGACGCGGATGAACAGGCAGACATCTGTGAATCGCTTCAC +GACCACGCTGATGAGCTTTACCGCAGCTGCCTCGCGCGTTTCGGTGATGACGGTGAAAAC +CTCTGACACATGCAGCTCCCGGAGACGGTCACAGCTTGTCTGTAAGCGGATGCCGGGAGC +AGACAAGCCCGTCAGGGCGCGTCAGCGGGTGTTGGCGGGTGTCGGGGCGCAGCCATGACC +CAGTCACGTAGCGATAGCGGAGTGTATACTGGCTTAACTATGCGGCATCAGAGCAGATTG +TACTGAGAGTGCACCATATGCGGTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACC +GCATCAGGCGCTCTTCCGCTTCCTCGCTCACTGACTCGCTGCGCTCGGTCGTTCGGCTGC +GGCGAGCGGTATCAGCTCACTCAAAGGCGGTAATACGGTTATCCACAGAATCAGGGGATA +ACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCAAAAGGCCAGGAACCGTAAAAAGGCCG +CGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCT +CAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAA +GCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTC +TCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGT +AGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCG +CCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGG +CAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCT +TGAAGTGGTGGCCTAACTACGGCTACACTAGAAGGACAGTATTTGGTATCTGCGCTCTGC +TGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCG +CTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTC +AAGAAGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTT +AAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAA +AATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAAT +GCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCT +GACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTG +CAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAG +CCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTA +ATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTG +CCATTGCTGCAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCG +GTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCT +CCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTA +TGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTG +GTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCC +CGGCGTCAACACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTG +GAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGA +TGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTG +GGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAAT +GTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTC +TCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCA +CATTTCCCCGAAAAGTGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCT +ATAAAAATAGGCGTATCACGAGGCCCTTTCGTCTTCAAGAA \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Tue Dec 17 17:25:37 2019 -0500 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Dec 17 17:25:37 2019 -0500 @@ -0,0 +1,7 @@ +<tables> + <!-- Location of SAMTools indexes for FASTA files --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables>