Mercurial > repos > yhoogstrate > fuma
diff fuma.xml @ 2:86526900cb8f draft
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/fuma_galaxy_wrapper commit 3da3fcc0204205d4899763f9fe63edf9aa16d5a2
author | yhoogstrate |
---|---|
date | Mon, 12 Oct 2015 04:17:07 -0400 |
parents | 54ce44828e1b |
children | 4966079d474b |
line wrap: on
line diff
--- a/fuma.xml Mon Jun 01 06:45:40 2015 -0400 +++ b/fuma.xml Mon Oct 12 04:17:07 2015 -0400 @@ -1,116 +1,182 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="fuma" name="FuMa" version="2.7.1.b"> - <description>FuMa (FusionMatcher) matches detected fusion genes based on gene name subset matching (designed in particular for RNA-Seq).</description> - - <requirements> - <requirement type="package" version="2.7.1">fuma</requirement> - </requirements> - - <version_command>fuma --version 2>&1 | head -n 1</version_command><!-- -V also works, but is not GNU standard --> - - <command> - #import pipes - - #set $gene_annotations = [] - #set $samples = [] - #set $links = [] - - #for $i, $d in enumerate( $datasets ) - - #set $sample_name = pipes.quote(str($d['sample'].name)) - - #set $gene_annotations = $gene_annotations + [ "ga_" + str($i) + ":" + str($d['gene_annotation'].file_name) ] - - #set $samples = $samples + [ $sample_name + ":" + str($d['format']) + ":" + str($d['sample'].file_name) ] - #set $links = $links + [ $sample_name + ":" + str("ga_") + str($i) ] - #end for - - #set $gene_annotations_str = " ".join(gene_annotations) - #set $samples_str = " ".join(samples) - #set $links_str = " ".join(links) - - fuma - -a - $gene_annotations_str - -s - $samples_str - -l - $links_str - #if $output_format.value == "list_boolean" - -f list - #else - -f $output_format.value - #end if - -o $fuma_overview ; - - - - #if $output_format.value == "list_boolean" - fuma-list-to-boolean-list -o tmp.txt $fuma_overview && - mv tmp.txt $fuma_overview - #end if - </command> - - <inputs> - <repeat name="datasets" title="FusionGene Datasets" min="2"> - <param name="sample" type="data" format="txt,tabular" label="Dataset (RNA-Seq fusion gene detection experiment)" /> - <param name="format" type="select" label="Format of dataset"> - <option value="chimerascan">ChimeraScan</option> - <option value="defuse">DeFuse</option> - <option value="complete-genomics">Complete Genomics</option> - <option value="fusion-catcher_final">Fusion Catcher (final-list file)</option> - <option value="fusionmap">FusionMap</option> - <option value="trinity-gmap">GMAP (As step after Trinity)</option> - <option value="oncofuse">OncoFuse</option> - <option value="rna-star_chimeric">STAR (chimeric file)</option> - <option value="tophat-fusion_pre">Tophat Fusion Pre (fusions.out)</option> - <option value="tophat-fusion_post_potential_fusion">Tophat Fusion Post (potential_fusion.txt)</option> - <option value="tophat-fusion_post_result">Tophat Fusion Post (result.txt)</option> - </param> - <param name="gene_annotation" type="data" format="bed" label="Corresponding gene-name annotation file (BED format)" help="Make use of persistent gene annotations! Gene annotations should only be different if different reference genome builds were used." /> - </repeat> - - <param name="output_format" type="select" label="Output format"> - <option value="list_boolean" selected="true">List (Boolean)</option> - <option value="list">List</option> - <option value="summary">Count summary</option> - </param> - </inputs> - - <outputs> - <data format="tabular" name="fuma_overview" label="${tool.name} on ${', '.join([ str(d['sample'].hid)+': '+d['sample'].name for d in $datasets ])}" /> - </outputs> - - <tests> - <test> - <!-- <repeat name="datasets"> --> - <param name="datasets_0|sample" value="chimerascan.txt" ftype="tabular" /> - <param name="datasets_0|format" value="chimerascan" /> - <param name="datasets_0|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> - <!-- </repeat> --> - <!-- <repeat name="datasets"> --> - <param name="datasets_1|sample" value="defuse.txt" ftype="tabular" /> - <param name="datasets_1|format" value="defuse" /> - <param name="datasets_1|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> - <!-- </repeat> --> - <!-- <repeat name="datasets"> --> - <param name="datasets_2|sample" value="fusion-map.txt" ftype="tabular" /> - <param name="datasets_2|format" value="fusionmap" /> - <param name="datasets_2|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> - <!-- </repeat> --> - <!-- <repeat name="datasets"> --> - <param name="datasets_3|sample" value="edgren_tp.txt" ftype="tabular" /> - <param name="datasets_3|format" value="fusionmap" /> - <param name="datasets_3|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> - <!-- </repeat> --> - - <param name="output_format" value="summary" /> - - <output name="fuma_overview" file="output.txt" /> - </test> - </tests> - - <help>============ +<tool id="fuma" name="FuMa" version="2.10.0.a"> + <description>match detected fusion genes based on gene names (in particular for RNA-Seq).</description> + + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="2.10.0">fuma</requirement> + </requirements> + + <version_command>fuma --version 2>&1 | head -n 1</version_command><!-- -V also works, but is not GNU standard --> + + <command><![CDATA[ + #import pipes + + #set $gene_annotations = [] + #set $samples = [] + #set $links = [] + + #for $i, $d in enumerate( $datasets ) + #set $sample_name = pipes.quote(str($d['sample'].name)) + + #set $gene_annotations = $gene_annotations + [ "ga_" + str($i) + ":" + str($d['gene_annotation'].file_name) ] + + #set $samples = $samples + [ $sample_name + ":" + str($d['format']) + ":" + str($d['sample'].file_name) ] + #set $links = $links + [ $sample_name + ":" + str("ga_") + str($i) ] + #end for + + #set $gene_annotations_str = " ".join(gene_annotations) + #set $samples_str = " ".join(samples) + #set $links_str = " ".join(links) + + fuma + -m + $params.matching_method + + $params.strand_specific_matching + $params.acceptor_donor_order_specific_matchig + + -a + $gene_annotations_str + -s + $samples_str + -l + $links_str + #if $params.output_format.value == "list_boolean" + -f list + #else + -f $params.output_format.value + #end if + -o $fuma_overview ; + + + #if $params.output_format.value == "list_boolean" + fuma-list-to-boolean-list -o tmp.txt $fuma_overview ; + mv tmp.txt $fuma_overview + #end if + ]]></command> + + <inputs> + <repeat name="datasets" title="FusionGene Datasets" min="2"> + <param name="sample" type="data" format="txt,tabular" label="Dataset (RNA-Seq fusion gene detection experiment)" /> + <param name="format" type="select" label="Format of dataset"> + <option value="chimera">Chimera prettyPrint()</option> + <option value="chimerascan">ChimeraScan</option> + <option value="defuse">DeFuse</option> + <option value="complete-genomics">Complete Genomics var/mastervar</option> + <option value="fusion-catcher_final">Fusion Catcher (final-list file)</option> + <option value="fusionmap">FusionMap</option> + <option value="trinity-gmap">GMAP (As step after Trinity)</option> + <option value="oncofuse">OncoFuse</option> + <option value="rna-star_chimeric">STAR (chimeric file)</option> + <option value="star-fusion_final">STAR-Fusion (candidates.final)</option> + <option value="tophat-fusion_pre">Tophat Fusion Pre (fusions.out)</option> + <option value="tophat-fusion_post_potential_fusion">Tophat Fusion Post (potential_fusion.txt)</option> + <option value="tophat-fusion_post_result">Tophat Fusion Post (result.txt)</option> + <option value="tophat-fusion_post_result_html">Tophat Fusion Post (result.html)</option> + </param> + <param name="gene_annotation" type="data" format="bed" label="Corresponding gene-name annotation file (BED format)" help="Make use of persistent gene annotations! Gene annotations should only be different if different reference genome builds were used." /> + </repeat> + + <conditional name="params"> + <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any FuMa parameter."> + <option value="preSet" selected="true">Use Defaults</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet"> + <param name="strand_specific_matching" type="hidden" value="--strand-specific-matching" /> + <param name="acceptor_donor_order_specific_matchig" type="hidden" value="--acceptor-donor-order-specific-matching" /> + </when> + <when value="full"> + <param name="matching_method" type="select" label="Matching method: technique used to match fusion genes based on annotated gene sets" help="Overlap is the most sensitive but also more sensitive for long gene artefacts; subset is the recommended technique and EGM is conservative."> + <option value="overlap">Overlap</option> + <option value="subset" selected="True">Subset</option> + <option value="egm">Exact Geneset Matching (EGM)</option> + </param> + + <param name="strand_specific_matching" type="boolean" checked="True" truevalue="--strand-specific-matching" falsevalue="" label="Consider fusion genes distinct when the breakpoints have different strands" help="Only a limited number of file formats support this feature." /> + <param name="acceptor_donor_order_specific_matchig" type="boolean" checked="True" truevalue="--acceptor-donor-order-specific-matching" falsevalue="" label="Consider fusion genes distinct when the donor and acceptor sites are swapped (A,B) != (B,A)" help="This settings is not recommended when fusion genes detected in DNA-Seq are used" /> + + <param name="output_format" type="select" label="Output format"> + <option value="list_boolean" selected="true">List (Boolean)</option> + <option value="list">List</option> + <option value="summary">Count summary</option> + </param> + </when> + </conditional> + </inputs> + + <outputs> + <data format="tabular" name="fuma_overview" label="${tool.name} on ${', '.join([ str(d['sample'].hid)+': '+d['sample'].name for d in $datasets ])}" /> + </outputs> + + <tests> + <test> + <!-- <repeat name="datasets"> --> + <param name="datasets_0|sample" value="chimerascan.txt" ftype="tabular" /> + <param name="datasets_0|format" value="chimerascan" /> + <param name="datasets_0|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> + <!-- </repeat> --> + <!-- <repeat name="datasets"> --> + <param name="datasets_1|sample" value="defuse.txt" ftype="tabular" /> + <param name="datasets_1|format" value="defuse" /> + <param name="datasets_1|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> + <!-- </repeat> --> + <!-- <repeat name="datasets"> --> + <param name="datasets_2|sample" value="fusion-map.txt" ftype="tabular" /> + <param name="datasets_2|format" value="fusionmap" /> + <param name="datasets_2|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> + <!-- </repeat> --> + <!-- <repeat name="datasets"> --> + <param name="datasets_3|sample" value="edgren_tp.txt" ftype="tabular" /> + <param name="datasets_3|format" value="fusionmap" /> + <param name="datasets_3|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> + <!-- </repeat> --> + + <param name="settingsType" value="full" /> + + <param name="matching_method" value="subset" /> + <param name="strand_specific_matching" value="--strand-specific-matching" /> + <param name="acceptor_donor_order_specific_matchig" value="--acceptor-donor-order-specific-matching" /> + <param name="output_format" value="list_boolean" /> + + <output name="fuma_overview" file="output_test_1.txt" /> + </test> + <test> + <!-- <repeat name="datasets"> --> + <param name="datasets_0|sample" value="chimerascan.txt" ftype="tabular" /> + <param name="datasets_0|format" value="chimerascan" /> + <param name="datasets_0|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> + <!-- </repeat> --> + <!-- <repeat name="datasets"> --> + <param name="datasets_1|sample" value="defuse.txt" ftype="tabular" /> + <param name="datasets_1|format" value="defuse" /> + <param name="datasets_1|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> + <!-- </repeat> --> + <!-- <repeat name="datasets"> --> + <param name="datasets_2|sample" value="fusion-map.txt" ftype="tabular" /> + <param name="datasets_2|format" value="fusionmap" /> + <param name="datasets_2|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> + <!-- </repeat> --> + <!-- <repeat name="datasets"> --> + <param name="datasets_3|sample" value="edgren_tp.txt" ftype="tabular" /> + <param name="datasets_3|format" value="fusionmap" /> + <param name="datasets_3|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" /> + <!-- </repeat> --> + + <param name="settingsType" value="full" /> + + <param name="matching_method" value="subset" /> + <param name="strand_specific_matching" value="" /> + <param name="acceptor_donor_order_specific_matchig" value="" /> + <param name="output_format" value="list_boolean" /> + + <output name="fuma_overview" file="output_test_2.txt" /> + </test> + </tests> + + <help><![CDATA[ +============ Introduction ============ @@ -131,6 +197,8 @@ +-------------------+-----------------------+-------------------------------------+ |Tools | File | Format string | +===================+=======================+=====================================+ +|Chimera | prettyPrint() output | chimera | ++-------------------+-----------------------+-------------------------------------+ |ChimeraScan | chimeras.bedpe | chimerascan | +-------------------+-----------------------+-------------------------------------+ |Complete Genomics | highConfidenceJu*.tsv | complete-genomics | @@ -153,19 +221,30 @@ +-------------------+-----------------------+-------------------------------------+ |RNA STAR | Chimeric.out.junction | rna-star_chimeric | +-------------------+-----------------------+-------------------------------------+ +|STAR Fusion | _candidates.final | star-fusion_final | ++-------------------+-----------------------+-------------------------------------+ |TopHat Fusion pre | fusions.out | tophat-fusion_pre | +-------------------+-----------------------+-------------------------------------+ |TopHat Fusion post | potential_fusion.txt | tophat-fusion_post_potential_fusion | +-------------------+-----------------------+-------------------------------------+ |TopHat Fusion post | result.txt | tophat-fusion_post_result | +-------------------+-----------------------+-------------------------------------+ +|TopHat Fusion post | result.html | tophat-fusion_post_result_html | ++-------------------+-----------------------+-------------------------------------+ To annotate genes upon the breakpoints you must provide a BED file that contains gene annotations for the user genome build. Make sure **your BED file contains one gene per line**. You should use BED files that contain one exon per line only if you want restrict your analysis to fusion genes detected within exons. UCSC genome browser provides a very simple way of obtaining BED files with one gene per line by selecting their *RefSeq Genes*-track and *knownGene*-table and putting the export format to BED. Galaxy should have a built-in UCSC table browser. - </help> - - <citations> - </citations> + ]]></help> + + <citations> + <citation type="bibtex"> + @unpublished{fuma, + author = {Youri Hoogstrate}, + title = {FuMa: reporting overlap in RNA-seq detected fusion genes}, + url = { https://github.com/yhoogstrate/fuma } + } + </citation> + </citations> </tool>