changeset 0:7345cb1bb772 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/arriba commit c1d05da7c2c76feae94cbc640be7b010f31397d2-dirty"
author jjohnson
date Fri, 11 Feb 2022 19:09:19 +0000
parents
children 55ca46d68a57
files arriba_download_reference.xml macros.xml static/images/draw-fusions-example.png test-data/genome.fasta.gz test-data/genome.gtf.gz tool-data/arriba_indexes.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 8 files changed, 354 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arriba_download_reference.xml	Fri Feb 11 19:09:19 2022 +0000
@@ -0,0 +1,116 @@
+<tool id="arriba_download_reference" name="Arriba Reference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
+    <description>Download to history</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+    echo $arriba_reference_name > '$star_index' &&
+    BASE_DIR=\$(dirname \$(dirname `which arriba`)) &&
+    REF_SCRIPT=`find \$BASE_DIR -name 'download_references.sh'` &&
+    #if $is_test != 'yes'
+    \$REF_SCRIPT '$arriba_reference_name' &&
+    cp *.fa*  '$genome_fasta' &&
+    cp *.gtf*  '$genome_gtf' &&
+    mv STAR_index_* '$star_index.extra_files_path'
+    #else
+    [[ -x \$REF_SCRIPT ]]
+    #end if
+    ]]></command>
+    <inputs>
+        <param name="is_test" type="hidden" value="no"/>
+        <param name="arriba_reference_name" type="select" label="Select reference">
+            <option value="GRCh38+ENSEMBL93">GRCh38+ENSEMBL93</option>
+            <option value="GRCh38+GENCODE28">GRCh38+GENCODE28</option>
+            <option value="GRCh38+RefSeq">GRCh38+RefSeq</option>
+            <option value="GRCh38viral+ENSEMBL93">GRCh38viral+ENSEMBL93</option>
+            <option value="GRCh38viral+GENCODE28">GRCh38viral+GENCODE28</option>
+            <option value="GRCh38viral+RefSeq">GRCh38viral+RefSeq</option>
+            <option value="hg38+ENSEMBL93">hg38+ENSEMBL93</option>
+            <option value="hg38+GENCODE28">hg38+GENCODE28</option>
+            <option value="hg38+RefSeq">hg38+RefSeq</option>
+            <option value="hg38viral+ENSEMBL93">hg38viral+ENSEMBL93</option>
+            <option value="hg38viral+GENCODE28">hg38viral+GENCODE28</option>
+            <option value="hg38viral+RefSeq">hg38viral+RefSeq</option>
+            <option value="GRCh37+ENSEMBL87">GRCh37+ENSEMBL87</option>
+            <option value="GRCh37+GENCODE19">GRCh37+GENCODE19</option>
+            <option value="GRCh37+RefSeq">GRCh37+RefSeq</option>
+            <option value="GRCh37viral+ENSEMBL87">GRCh37viral+ENSEMBL87</option>
+            <option value="GRCh37viral+GENCODE19">GRCh37viral+GENCODE19</option>
+            <option value="GRCh37viral+RefSeq">GRCh37viral+RefSeq</option>
+            <option value="hg19+ENSEMBL87">hg19+ENSEMBL87</option>
+            <option value="hg19+GENCODE19">hg19+GENCODE19</option>
+            <option value="hg19+RefSeq">hg19+RefSeq</option>
+            <option value="hg19viral+ENSEMBL87">hg19viral+ENSEMBL87</option>
+            <option value="hg19viral+GENCODE19">hg19viral+GENCODE19</option>
+            <option value="hg19viral+RefSeq">hg19viral+RefSeq</option>
+            <option value="hs37d5+ENSEMBL87">hs37d5+ENSEMBL87</option>
+            <option value="hs37d5+GENCODE19">hs37d5+GENCODE19</option>
+            <option value="hs37d5+RefSeq">hs37d5+RefSeq</option>
+            <option value="hs37d5viral+ENSEMBL87">hs37d5viral+ENSEMBL87</option>
+            <option value="hs37d5viral+GENCODE19">hs37d5viral+GENCODE19</option>
+            <option value="hs37d5viral+RefSeq">hs37d5viral+RefSeq</option>
+            <option value="GRCm39+GENCODEM26">GRCm39+GENCODEM26</option>
+            <option value="GRCm39+RefSeq">GRCm39+RefSeq</option>
+            <option value="GRCm39viral+GENCODEM26">GRCm39viral+GENCODEM26</option>
+            <option value="GRCm39viral+RefSeq">GRCm39viral+RefSeq</option>
+            <option value="GRCm38+GENCODEM25">GRCm38+GENCODEM25</option>
+            <option value="GRCm38+RefSeq">GRCm38+RefSeq</option>
+            <option value="GRCm38viral+GENCODEM25">GRCm38viral+GENCODEM25</option>
+            <option value="GRCm38viral+RefSeq">GRCm38viral+RefSeq</option>
+            <option value="mm39+GENCODEM26">mm39+GENCODEM26</option>
+            <option value="mm39+RefSeq">mm39+RefSeq</option>
+            <option value="mm39viral+GENCODEM26">mm39viral+GENCODEM26</option>
+            <option value="mm39viral+RefSeq">mm39viral+RefSeq</option>
+            <option value="mm10+GENCODEM25">mm10+GENCODEM25</option>
+            <option value="mm10+RefSeq">mm10+RefSeq</option>
+            <option value="mm10viral+GENCODEM25">mm10viral+GENCODEM25</option>
+            <option value="mm10viral+RefSeq">mm10viral+RefSeq</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="genome_fasta" format="fasta" label="${tool.name} ${arriba_reference_name} fasta"/>
+        <data name="genome_gtf" format="gtf" label="${tool.name} ${arriba_reference_name} GTF"/>
+        <data name="star_index" format="txt" label="${tool.name} ${arriba_reference_name} STAR index"/>
+    </outputs>
+    <tests>
+        <!-- Downloading a genome and annotation plus build a STAR index requires too many resources for testing. 
+              Just test that we can locate the script. -->
+        <test>
+            <param name="is_test" value="yes"/>
+            <param name="arriba_reference_name" value="GRCh38+ENSEMBL93"/>
+            <output name="star_index">
+                <assert_contents>
+                    <has_text text="GRCh38+ENSEMBL93"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+** Arriba Reference **
+
+Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions.
+It is based on chimeric alignments found by the STAR RNA-Seq aligner.
+
+**Arriba Reference** downloads a genome sequence fasta and its related annotation GTF, and then build a STAR index for the RNA STAR aligner.  
+
+These datasets will be added to your Galaxy history:
+
+    - genome assembly fasta 
+    - genome annotation GTF 
+    - STAR index
+
+See Arriba manual pages:
+
+  - https://arriba.readthedocs.io/en/latest/workflow/
+  - https://arriba.readthedocs.io/en/latest/input-files/
+
+
+**NOTE:** This is a resource intensive process, so the results should be copied to new histories as needed rather than running this in each workflow.
+
+.. _Arriba: https://arriba.readthedocs.io/en/latest/
+
+]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Feb 11 19:09:19 2022 +0000
@@ -0,0 +1,209 @@
+<macros>
+    <token name="@TOOL_VERSION@">2.2.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="requirements">
+        <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">arriba</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1101/gr.257246.119</citation>
+            <yield />
+        </citations>
+    </xml>
+    <xml name="version_command">
+        <version_command>arriba -h | grep Version | sed 's/^.* //'</version_command>
+    </xml>
+    <xml name="genome_source" token_assembly_optional="false" >
+        <conditional name="genome">
+            <param name="genome_source" type="select" label="Arriba Genome assembly and annotation source">
+                <option value="history">From your history</option>
+                <option value="cached">Use built-in Arriba</option>
+            </param>
+            <when value="history">
+                <param name="assembly" argument="-a" type="data" format="fasta" optional="@ASSEMBLY_OPTIONAL@" label="Genome assembly fasta"/>
+                <param name="annotation" argument="-g" type="data" format="gtf" label="Gene annotation in GTF format"/>
+            </when>
+            <when value="cached">
+                <param name="arriba_ref" type="select" label="Arriba Genome assembly and annotation">
+                    <options from_data_table="arriba_indexes">
+                    </options>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@GENOME_SOURCE@">
+#if str($genome.genome_source) == "history"
+    #if $genome.assembly
+        #set $genome_assembly = $genome.assembly
+    #end if
+    #set $genome_annotation = $genome.annotation
+#else
+    #set $genome_assembly = $genome.arriba_ref.fields.fasta
+    #set $genome_annotation = $genome.arriba_ref.fields.gtf
+#end if
+</token>
+
+    <xml name="visualization_options">
+                <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/>
+                <section name="options" expanded="false" title="Draw Fusion Options">
+                    <param argument="--transcriptSelection" type="select" optional="true" label="Transcript selection">
+                        <help>By default the transcript isoform with the highest coverage is drawn.
+                             Alternatively, the transcript isoform that is provided in the columns
+                             transcript_id1 and transcript_id2 in the given fusions file can be drawn.
+                             Selecting the isoform with the highest coverage usually produces nicer plots,
+                             in the sense that the coverage track is smooth and shows a visible increase in coverage after the fusion breakpoint.
+                             However, the isoform with the highest coverage may not be the one that is involved in the fusion.
+                             Often, genomic rearrangements lead to non-canonical isoforms being transcribed.
+                             For this reason, it can make sense to rely on the transcript selection provided by the columns transcript_id1/2,
+                             which reflect the actual isoforms involved in a fusion.
+\                            As a third option, the transcripts that are annotated as canonical can be drawn.
+                             Transcript isoforms tagged with appris_principal, appris_candidate, or CCDS are considered canonical.
+                        </help>
+                        <option value="coverage">coverage</option>
+                        <option value="provided">provided</option>
+                        <option value="canonical">canonical</option>
+                    </param>
+                    <param argument="--minConfidenceForCircosPlot" type="select" optional="true" label="Transcript selection">
+                        <help>The fusion of interest is drawn as a solid line in the circos plot.
+                              To give an impression of the overall degree of rearrangement,
+                              all other fusions are drawn as semi-transparent lines in the background.
+                              This option determines which other fusions should be included in the circos plot.
+                              Values specify the minimum confidence a fusion must have to be included.
+                              It usually makes no sense to include low-confidence fusions in circos plots,
+                              because they are abundant and unreliable, and would clutter up the circos plot.
+                              Default: medium
+                        </help>
+                        <option value="none">none - only the fusion of interest is drawn</option>
+                        <option value="low">low</option>
+                        <option value="medium">medium</option>
+                        <option value="high">high</option>
+                    </param>
+                    <param argument="--showIntergenicVicinity" type="integer" value="" min="0" optional="true" label="Intergenic Vicinity">
+                        <help>This option only applies to intergenic breakpoints.
+                              If it is set to a value greater than 0, then the script draws the genes
+                              which are no more than the given distance away from an intergenic breakpoint.
+                              Note that this option is incompatible with squishIntrons.
+                              Default: 0
+                        </help>
+                    </param>
+                    <param argument="--squishIntrons" type="select" optional="true" label="Squish introns">
+                        <help>Exons usually make up only a small fraction of a gene.
+                              They may be hard to see in the plot. i
+                              Since introns are in most situations of no interest in the context of gene fusions,
+                              this switch can be used to shrink the size of introns to a fixed, negligible size.
+                              It makes sense to disable this feature, if breakpoints in introns are of importance.
+                              Default: TRUE
+                        </help>
+                        <option value="TRUE">True</option>
+                        <option value="FALSE">False</option>
+                    </param>
+
+                    <param argument="--mergeDomainsOverlappingBy" type="float" value="" min="0." max="1.0" optional="true" label="Merge Domains Overlapping By">
+                        <help>Occasionally, domains are annotated redundantly.
+                              For example, tyrosine kinase domains are frequently annotated as
+                              Protein tyrosine kinase and Protein kinase domain.
+                              In order to simplify the visualization, such domains can be merged into one,
+                              given that they overlap by the given fraction.
+                              The description of the larger domain is used.
+                              Default: 0.9
+                        </help>
+                    </param>
+                    <param argument="--printExonLabels" type="select" optional="true" label="Print Exon Labels">
+                        <help>By default the number of an exon is printed inside each exon,
+                              which is taken from the attribute exon_number of the GTF annotation.
+                              When a gene has many exons, the boxes may be too narrow to contain the labels,
+                              resulting in unreadable exon labels. In these situations, i
+                              it may be better to turn off exon labels.
+                              Default: TRUE
+                        </help>
+                        <option value="TRUE">True</option>
+                        <option value="FALSE">False</option>
+                    </param>
+                    <param argument="--render3dEffect" type="select" optional="true" label="Render 3D effect">
+                        <help>Whether light and shadow should be rendered to give objects a 3D effect.
+                              Default: TRUE
+                        </help>
+                        <option value="TRUE">True</option>
+                        <option value="FALSE">False</option>
+                    </param>
+                    <param argument="--optimizeDomainColors" type="select" optional="true" label="Optimize Domain Colors">
+                        <help>By default, the script colorizes domains according to the colors
+                              specified in the file given in --annotation.
+                              This way, coloring of domains is consistent across all proteins.
+                              But since there are more distinct domains than colors,
+                              this can lead to different domains having the same color.
+                              If this option is set to TRUE, the colors are recomputed for each fusion separately.
+                              This ensures that the colors have the maximum distance for each individual fusion,
+                              but they are no longer consistent across different fusions.
+                              Default: FALSE
+                        </help>
+                        <option value="TRUE">True</option>
+                        <option value="FALSE">False</option>
+                    </param>
+                    <param argument="--color1" type="color" value="" optional="true"  label="Color of the 5' end of the fusion."/>
+                    <param argument="--color2" type="color" value="" optional="true"  label="Color of the 3' end of the fusion."/>
+                    <param argument="--pdfWidth" type="float" value="" min="1." optional="true" label="Width of PDF output file in inches"
+                           help="Default: 11.692"/>
+                    <param argument="--pdfHeight" type="float" value="" min="1." optional="true" label="Height of PDF output file in inches"
+                           help="Default: 8.267"/>
+                    <param argument="--fontSize" type="float" value="" min="0." optional="true" label="Scale the size of text"
+                           help="Default: 1.0"/>
+                </section>
+    </xml>
+    <token name="@DRAW_FUSIONS@">
+draw_fusions.R
+    --fusions='$fusions'
+    --alignments='Aligned.sortedByCoord.out.bam'
+    --annotation='$genome.annotation'
+    --output=fusions.pdf
+    #if $visualization.cytobands
+    --cytobands='$visualization.cytobands'
+    #end if
+    #if $protein_domains
+    --proteinDomains='$protein_domains'
+    #end if
+    ## Visualization Options
+    #if $visualization.options.transcriptSelection
+        --transcriptSelection=$visualization.options.transcriptSelection
+    #end if
+    #if $visualization.options.minConfidenceForCircosPlot
+        --minConfidenceForCircosPlot=$visualization.options.minConfidenceForCircosPlot
+    #end if
+    #if $visualization.options.showIntergenicVicinity
+        --showIntergenicVicinity=$visualization.options.showIntergenicVicinity
+    #end if
+    #if $visualization.options.squishIntrons
+        --squishIntrons=$visualization.options.squishIntrons
+    #end if
+    #if $visualization.options.mergeDomainsOverlappingBy
+        --mergeDomainsOverlappingBy=$visualization.options.mergeDomainsOverlappingBy
+    #end if
+    #if $visualization.options.printExonLabels
+        --printExonLabels=$visualization.options.printExonLabels
+    #end if
+    #if $visualization.options.render3dEffect
+        --render3dEffect=$visualization.options.render3dEffect
+    #end if
+    #if $visualization.options.optimizeDomainColors
+        --optimizeDomainColors=$visualization.options.optimizeDomainColors
+    #end if
+    #if $visualization.options.color1
+        --color1=$visualization.options.color1
+    #end if
+    #if $visualization.options.color2
+        --color2=$visualization.options.color2
+    #end if
+    #if $visualization.options.pdfWidth
+        --pdfWidth=$visualization.options.pdfWidth
+    #end if
+    #if $visualization.options.pdfHeight
+        --pdfHeight=$visualization.options.pdfHeight
+    #end if
+    #if $visualization.options.fontSize
+        --fontSize=$visualization.options.fontSize
+    #end if
+</token>
+</macros>
Binary file static/images/draw-fusions-example.png has changed
Binary file test-data/genome.fasta.gz has changed
Binary file test-data/genome.gtf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/arriba_indexes.loc.sample	Fri Feb 11 19:09:19 2022 +0000
@@ -0,0 +1,17 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Ariba data files.
+#The Arriba script download_references.sh retrieves a genome assembly fasta
+#and a related GTF annotation file, then builds a STAR index.
+#You will need to create these data files and then create a
+#arriba_indexes.loc similar to this one (store it in this
+#directory) that points to the directories in which those files are stored.
+#The arriba_indexes.loc file has this format (longer white space
+#characters are TAB characters):
+#
+#<unique_build_id>   <display_name>   <genome_fasta_path>	<genome_gtf_path>	<STAR_index_path>
+#
+#Note that STAR indices can become quite large. 
+#
+#<unique_build_id>	<display_name>	<genome_fasta_path>	<genome_gtf_path>	<STAR_index_path>
+#GRCh38+ENSEMBL93	GRCh38+ENSEMBL93	/depot/GRCh38+ENSEMBL93/genome.fa	/depot/GRCh38+ENSEMBL93/genome.gtf	/depot/GRCh38+ENSEMBL93/STAR_index/
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Feb 11 19:09:19 2022 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="arriba_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, fasta, gtf, star_index</columns>
+        <file path="tool-data/arriba_indexes.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Feb 11 19:09:19 2022 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="arriba_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, fasta, gtf, star_index</columns>
+        <file path="${__HERE__}/test-data/arriba_indexes.loc" />
+    </table>
+</tables>