Mercurial > repos > jjohnson > arriba_get_filters

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arriba_get_filters.xml	Fri Feb 11 19:08:51 2022 +0000
@@ -0,0 +1,71 @@
+<tool id="arriba_get_filters" name="Arriba Get Filters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
+    <description>to history</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+    BASE_DIR=\$(dirname \$(dirname `which arriba`)) &&
+    REF_SCRIPT=`find \$BASE_DIR -name 'download_references.sh'` &&
+    REF_DIR=\$(dirname \$REF_SCRIPT) &&
+    REF_NAME=${arriba_reference_name.split('+')[0].replace('viral','')} &&
+    echo \$REF_NAME &&
+    cp `find \$REF_DIR -name 'blacklist_*' | grep -i \$REF_NAME` '$blacklist' &&
+    cp `find \$REF_DIR -name 'known_fusions_*' | grep -i \$REF_NAME` '$known_fusions' &&
+    cp `find \$REF_DIR -name 'protein_domains_*' | grep -i \$REF_NAME` '$protein_domains' &&
+    cp `find \$REF_DIR -name 'cytobands_*' | grep -i \$REF_NAME` '$cytobands'
+    #*
+    cp "\$REF_DIR/blacklist_*${arriba_reference_name}*" '$blacklist' &&
+    cp "\$REF_DIR/known_fusions_*${arriba_reference_name}*" '$known_fusions' &&
+    cp "\$REF_DIR/protein_domains_*${arriba_reference_name}*" '$protein_domains' &&
+    cp "\$REF_DIR/cytobands_*${arriba_reference_name}*" '$cytobands'
+    *#
+    ]]></command>
+    <inputs>
+        <param name="arriba_reference_name" type="text" label="Select reference">
+            <help>GRCh38 GRCh37 hg38 hg19 GRCm38 mm10</help>
+            <option value="GRCh38">GRCh38</option>
+            <option value="GRCh37">GRCh37</option>
+            <option value="hg38">hg38</option>
+            <option value="hg19">hg19</option>
+            <option value="GRCm38">GRCm38</option>
+            <option value="mm10">mm10</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="blacklist" format="tabular.gz" label="${tool.name} ${arriba_reference_name} blacklist.tsv.gz"/>
+        <data name="known_fusions" format="tabular.gz" label="${tool.name} ${arriba_reference_name} known_fusions.tsv.gz"/>
+        <data name="protein_domains" format="gff3" label="${tool.name} ${arriba_reference_name} protein_domains.gff3"/>
+        <data name="cytobands" format="tabular" label="${tool.name} ${arriba_reference_name} cytobands.tsv"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="arriba_reference_name" value="GRCh38"/>
+            <output name="cytobands">
+                <assert_contents>
+                    <has_text_matching expression="1\t1\t\d+\tp36.33\tgneg"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**Arriba Get Filters**
+
+Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions.
+It is based on chimeric alignments found by the STAR RNA-Seq aligner.
+
+The **Arriba Get Filters** tool adds the following Arriba distribution input_files_ to your galaxy history:
+
+  - blacklist
+  - known_fusions
+  - protein_domains
+  - cytobands
+
+
+.. _Arriba: https://arriba.readthedocs.io/en/latest/
+.. _input_files: https://arriba.readthedocs.io/en/latest/input-files/
+
+]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Feb 11 19:08:51 2022 +0000
@@ -0,0 +1,209 @@
+<macros>
+    <token name="@TOOL_VERSION@">2.2.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="requirements">
+        <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">arriba</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1101/gr.257246.119</citation>
+            <yield />
+        </citations>
+    </xml>
+    <xml name="version_command">
+        <version_command>arriba -h | grep Version | sed 's/^.* //'</version_command>
+    </xml>
+    <xml name="genome_source" token_assembly_optional="false" >
+        <conditional name="genome">
+            <param name="genome_source" type="select" label="Arriba Genome assembly and annotation source">
+                <option value="history">From your history</option>
+                <option value="cached">Use built-in Arriba</option>
+            </param>
+            <when value="history">
+                <param name="assembly" argument="-a" type="data" format="fasta" optional="@ASSEMBLY_OPTIONAL@" label="Genome assembly fasta"/>
+                <param name="annotation" argument="-g" type="data" format="gtf" label="Gene annotation in GTF format"/>
+            </when>
+            <when value="cached">
+                <param name="arriba_ref" type="select" label="Arriba Genome assembly and annotation">
+                    <options from_data_table="arriba_indexes">
+                    </options>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@GENOME_SOURCE@">
+#if str($genome.genome_source) == "history"
+    #if $genome.assembly
+        #set $genome_assembly = $genome.assembly
+    #end if
+    #set $genome_annotation = $genome.annotation
+#else
+    #set $genome_assembly = $genome.arriba_ref.fields.fasta
+    #set $genome_annotation = $genome.arriba_ref.fields.gtf
+#end if
+</token>
+
+    <xml name="visualization_options">
+                <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/>
+                <section name="options" expanded="false" title="Draw Fusion Options">
+                    <param argument="--transcriptSelection" type="select" optional="true" label="Transcript selection">
+                        <help>By default the transcript isoform with the highest coverage is drawn.
+                             Alternatively, the transcript isoform that is provided in the columns
+                             transcript_id1 and transcript_id2 in the given fusions file can be drawn.
+                             Selecting the isoform with the highest coverage usually produces nicer plots,
+                             in the sense that the coverage track is smooth and shows a visible increase in coverage after the fusion breakpoint.
+                             However, the isoform with the highest coverage may not be the one that is involved in the fusion.
+                             Often, genomic rearrangements lead to non-canonical isoforms being transcribed.
+                             For this reason, it can make sense to rely on the transcript selection provided by the columns transcript_id1/2,
+                             which reflect the actual isoforms involved in a fusion.
+\                            As a third option, the transcripts that are annotated as canonical can be drawn.
+                             Transcript isoforms tagged with appris_principal, appris_candidate, or CCDS are considered canonical.
+                        </help>
+                        <option value="coverage">coverage</option>
+                        <option value="provided">provided</option>
+                        <option value="canonical">canonical</option>
+                    </param>
+                    <param argument="--minConfidenceForCircosPlot" type="select" optional="true" label="Transcript selection">
+                        <help>The fusion of interest is drawn as a solid line in the circos plot.
+                              To give an impression of the overall degree of rearrangement,
+                              all other fusions are drawn as semi-transparent lines in the background.
+                              This option determines which other fusions should be included in the circos plot.
+                              Values specify the minimum confidence a fusion must have to be included.
+                              It usually makes no sense to include low-confidence fusions in circos plots,
+                              because they are abundant and unreliable, and would clutter up the circos plot.
+                              Default: medium
+                        </help>
+                        <option value="none">none - only the fusion of interest is drawn</option>
+                        <option value="low">low</option>
+                        <option value="medium">medium</option>
+                        <option value="high">high</option>
+                    </param>
+                    <param argument="--showIntergenicVicinity" type="integer" value="" min="0" optional="true" label="Intergenic Vicinity">
+                        <help>This option only applies to intergenic breakpoints.
+                              If it is set to a value greater than 0, then the script draws the genes
+                              which are no more than the given distance away from an intergenic breakpoint.
+                              Note that this option is incompatible with squishIntrons.
+                              Default: 0
+                        </help>
+                    </param>
+                    <param argument="--squishIntrons" type="select" optional="true" label="Squish introns">
+                        <help>Exons usually make up only a small fraction of a gene.
+                              They may be hard to see in the plot. i
+                              Since introns are in most situations of no interest in the context of gene fusions,
+                              this switch can be used to shrink the size of introns to a fixed, negligible size.
+                              It makes sense to disable this feature, if breakpoints in introns are of importance.
+                              Default: TRUE
+                        </help>
+                        <option value="TRUE">True</option>
+                        <option value="FALSE">False</option>
+                    </param>
+
+                    <param argument="--mergeDomainsOverlappingBy" type="float" value="" min="0." max="1.0" optional="true" label="Merge Domains Overlapping By">
+                        <help>Occasionally, domains are annotated redundantly.
+                              For example, tyrosine kinase domains are frequently annotated as
+                              Protein tyrosine kinase and Protein kinase domain.
+                              In order to simplify the visualization, such domains can be merged into one,
+                              given that they overlap by the given fraction.
+                              The description of the larger domain is used.
+                              Default: 0.9
+                        </help>
+                    </param>
+                    <param argument="--printExonLabels" type="select" optional="true" label="Print Exon Labels">
+                        <help>By default the number of an exon is printed inside each exon,
+                              which is taken from the attribute exon_number of the GTF annotation.
+                              When a gene has many exons, the boxes may be too narrow to contain the labels,
+                              resulting in unreadable exon labels. In these situations, i
+                              it may be better to turn off exon labels.
+                              Default: TRUE
+                        </help>
+                        <option value="TRUE">True</option>
+                        <option value="FALSE">False</option>
+                    </param>
+                    <param argument="--render3dEffect" type="select" optional="true" label="Render 3D effect">
+                        <help>Whether light and shadow should be rendered to give objects a 3D effect.
+                              Default: TRUE
+                        </help>
+                        <option value="TRUE">True</option>
+                        <option value="FALSE">False</option>
+                    </param>
+                    <param argument="--optimizeDomainColors" type="select" optional="true" label="Optimize Domain Colors">
+                        <help>By default, the script colorizes domains according to the colors
+                              specified in the file given in --annotation.
+                              This way, coloring of domains is consistent across all proteins.
+                              But since there are more distinct domains than colors,
+                              this can lead to different domains having the same color.
+                              If this option is set to TRUE, the colors are recomputed for each fusion separately.
+                              This ensures that the colors have the maximum distance for each individual fusion,
+                              but they are no longer consistent across different fusions.
+                              Default: FALSE
+                        </help>
+                        <option value="TRUE">True</option>
+                        <option value="FALSE">False</option>
+                    </param>
+                    <param argument="--color1" type="color" value="" optional="true"  label="Color of the 5' end of the fusion."/>
+                    <param argument="--color2" type="color" value="" optional="true"  label="Color of the 3' end of the fusion."/>
+                    <param argument="--pdfWidth" type="float" value="" min="1." optional="true" label="Width of PDF output file in inches"
+                           help="Default: 11.692"/>
+                    <param argument="--pdfHeight" type="float" value="" min="1." optional="true" label="Height of PDF output file in inches"
+                           help="Default: 8.267"/>
+                    <param argument="--fontSize" type="float" value="" min="0." optional="true" label="Scale the size of text"
+                           help="Default: 1.0"/>
+                </section>
+    </xml>
+    <token name="@DRAW_FUSIONS@">
+draw_fusions.R
+    --fusions='$fusions'
+    --alignments='Aligned.sortedByCoord.out.bam'
+    --annotation='$genome.annotation'
+    --output=fusions.pdf
+    #if $visualization.cytobands
+    --cytobands='$visualization.cytobands'
+    #end if
+    #if $protein_domains
+    --proteinDomains='$protein_domains'
+    #end if
+    ## Visualization Options
+    #if $visualization.options.transcriptSelection
+        --transcriptSelection=$visualization.options.transcriptSelection
+    #end if
+    #if $visualization.options.minConfidenceForCircosPlot
+        --minConfidenceForCircosPlot=$visualization.options.minConfidenceForCircosPlot
+    #end if
+    #if $visualization.options.showIntergenicVicinity
+        --showIntergenicVicinity=$visualization.options.showIntergenicVicinity
+    #end if
+    #if $visualization.options.squishIntrons
+        --squishIntrons=$visualization.options.squishIntrons
+    #end if
+    #if $visualization.options.mergeDomainsOverlappingBy
+        --mergeDomainsOverlappingBy=$visualization.options.mergeDomainsOverlappingBy
+    #end if
+    #if $visualization.options.printExonLabels
+        --printExonLabels=$visualization.options.printExonLabels
+    #end if
+    #if $visualization.options.render3dEffect
+        --render3dEffect=$visualization.options.render3dEffect
+    #end if
+    #if $visualization.options.optimizeDomainColors
+        --optimizeDomainColors=$visualization.options.optimizeDomainColors
+    #end if
+    #if $visualization.options.color1
+        --color1=$visualization.options.color1
+    #end if
+    #if $visualization.options.color2
+        --color2=$visualization.options.color2
+    #end if
+    #if $visualization.options.pdfWidth
+        --pdfWidth=$visualization.options.pdfWidth
+    #end if
+    #if $visualization.options.pdfHeight
+        --pdfHeight=$visualization.options.pdfHeight
+    #end if
+    #if $visualization.options.fontSize
+        --fontSize=$visualization.options.fontSize
+    #end if
+</token>
+</macros>
Binary file static/images/draw-fusions-example.png has changed
Binary file test-data/genome.fasta.gz has changed
Binary file test-data/genome.gtf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/arriba_indexes.loc.sample	Fri Feb 11 19:08:51 2022 +0000
@@ -0,0 +1,17 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Ariba data files.
+#The Arriba script download_references.sh retrieves a genome assembly fasta
+#and a related GTF annotation file, then builds a STAR index.
+#You will need to create these data files and then create a
+#arriba_indexes.loc similar to this one (store it in this
+#directory) that points to the directories in which those files are stored.
+#The arriba_indexes.loc file has this format (longer white space
+#characters are TAB characters):
+#
+#<unique_build_id>   <display_name>   <genome_fasta_path>	<genome_gtf_path>	<STAR_index_path>
+#
+#Note that STAR indices can become quite large.
+#
+#<unique_build_id>	<display_name>	<genome_fasta_path>	<genome_gtf_path>	<STAR_index_path>
+#GRCh38+ENSEMBL93	GRCh38+ENSEMBL93	/depot/GRCh38+ENSEMBL93/genome.fa	/depot/GRCh38+ENSEMBL93/genome.gtf	/depot/GRCh38+ENSEMBL93/STAR_index/
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Feb 11 19:08:51 2022 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="arriba_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, fasta, gtf, star_index</columns>
+        <file path="tool-data/arriba_indexes.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Feb 11 19:08:51 2022 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="arriba_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, fasta, gtf, star_index</columns>
+        <file path="${__HERE__}/test-data/arriba_indexes.loc" />
+    </table>
+</tables>