Mercurial > repos > jjohnson > arriba_get_filters
changeset 0:463dd21dc267 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/arriba commit c1d05da7c2c76feae94cbc640be7b010f31397d2-dirty"
author | jjohnson |
---|---|
date | Fri, 11 Feb 2022 19:08:51 +0000 |
parents | |
children | f1e60cf0823a |
files | arriba_get_filters.xml macros.xml static/images/draw-fusions-example.png test-data/genome.fasta.gz test-data/genome.gtf.gz tool-data/arriba_indexes.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 8 files changed, 309 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arriba_get_filters.xml Fri Feb 11 19:08:51 2022 +0000 @@ -0,0 +1,71 @@ +<tool id="arriba_get_filters" name="Arriba Get Filters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> + <description>to history</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="exit_code"><![CDATA[ + BASE_DIR=\$(dirname \$(dirname `which arriba`)) && + REF_SCRIPT=`find \$BASE_DIR -name 'download_references.sh'` && + REF_DIR=\$(dirname \$REF_SCRIPT) && + REF_NAME=${arriba_reference_name.split('+')[0].replace('viral','')} && + echo \$REF_NAME && + cp `find \$REF_DIR -name 'blacklist_*' | grep -i \$REF_NAME` '$blacklist' && + cp `find \$REF_DIR -name 'known_fusions_*' | grep -i \$REF_NAME` '$known_fusions' && + cp `find \$REF_DIR -name 'protein_domains_*' | grep -i \$REF_NAME` '$protein_domains' && + cp `find \$REF_DIR -name 'cytobands_*' | grep -i \$REF_NAME` '$cytobands' + #* + cp "\$REF_DIR/blacklist_*${arriba_reference_name}*" '$blacklist' && + cp "\$REF_DIR/known_fusions_*${arriba_reference_name}*" '$known_fusions' && + cp "\$REF_DIR/protein_domains_*${arriba_reference_name}*" '$protein_domains' && + cp "\$REF_DIR/cytobands_*${arriba_reference_name}*" '$cytobands' + *# + ]]></command> + <inputs> + <param name="arriba_reference_name" type="text" label="Select reference"> + <help>GRCh38 GRCh37 hg38 hg19 GRCm38 mm10</help> + <option value="GRCh38">GRCh38</option> + <option value="GRCh37">GRCh37</option> + <option value="hg38">hg38</option> + <option value="hg19">hg19</option> + <option value="GRCm38">GRCm38</option> + <option value="mm10">mm10</option> + </param> + </inputs> + <outputs> + <data name="blacklist" format="tabular.gz" label="${tool.name} ${arriba_reference_name} blacklist.tsv.gz"/> + <data name="known_fusions" format="tabular.gz" label="${tool.name} ${arriba_reference_name} known_fusions.tsv.gz"/> + <data name="protein_domains" format="gff3" label="${tool.name} ${arriba_reference_name} protein_domains.gff3"/> + <data name="cytobands" format="tabular" label="${tool.name} ${arriba_reference_name} cytobands.tsv"/> + </outputs> + <tests> + <test> + <param name="arriba_reference_name" value="GRCh38"/> + <output name="cytobands"> + <assert_contents> + <has_text_matching expression="1\t1\t\d+\tp36.33\tgneg"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**Arriba Get Filters** + +Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions. +It is based on chimeric alignments found by the STAR RNA-Seq aligner. + +The **Arriba Get Filters** tool adds the following Arriba distribution input_files_ to your galaxy history: + + - blacklist + - known_fusions + - protein_domains + - cytobands + + +.. _Arriba: https://arriba.readthedocs.io/en/latest/ +.. _input_files: https://arriba.readthedocs.io/en/latest/input-files/ + +]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Feb 11 19:08:51 2022 +0000 @@ -0,0 +1,209 @@ +<macros> + <token name="@TOOL_VERSION@">2.2.1</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">arriba</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1101/gr.257246.119</citation> + <yield /> + </citations> + </xml> + <xml name="version_command"> + <version_command>arriba -h | grep Version | sed 's/^.* //'</version_command> + </xml> + <xml name="genome_source" token_assembly_optional="false" > + <conditional name="genome"> + <param name="genome_source" type="select" label="Arriba Genome assembly and annotation source"> + <option value="history">From your history</option> + <option value="cached">Use built-in Arriba</option> + </param> + <when value="history"> + <param name="assembly" argument="-a" type="data" format="fasta" optional="@ASSEMBLY_OPTIONAL@" label="Genome assembly fasta"/> + <param name="annotation" argument="-g" type="data" format="gtf" label="Gene annotation in GTF format"/> + </when> + <when value="cached"> + <param name="arriba_ref" type="select" label="Arriba Genome assembly and annotation"> + <options from_data_table="arriba_indexes"> + </options> + </param> + </when> + </conditional> + </xml> + <token name="@GENOME_SOURCE@"> +#if str($genome.genome_source) == "history" + #if $genome.assembly + #set $genome_assembly = $genome.assembly + #end if + #set $genome_annotation = $genome.annotation +#else + #set $genome_assembly = $genome.arriba_ref.fields.fasta + #set $genome_annotation = $genome.arriba_ref.fields.gtf +#end if +</token> + + <xml name="visualization_options"> + <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/> + <section name="options" expanded="false" title="Draw Fusion Options"> + <param argument="--transcriptSelection" type="select" optional="true" label="Transcript selection"> + <help>By default the transcript isoform with the highest coverage is drawn. + Alternatively, the transcript isoform that is provided in the columns + transcript_id1 and transcript_id2 in the given fusions file can be drawn. + Selecting the isoform with the highest coverage usually produces nicer plots, + in the sense that the coverage track is smooth and shows a visible increase in coverage after the fusion breakpoint. + However, the isoform with the highest coverage may not be the one that is involved in the fusion. + Often, genomic rearrangements lead to non-canonical isoforms being transcribed. + For this reason, it can make sense to rely on the transcript selection provided by the columns transcript_id1/2, + which reflect the actual isoforms involved in a fusion. +\ As a third option, the transcripts that are annotated as canonical can be drawn. + Transcript isoforms tagged with appris_principal, appris_candidate, or CCDS are considered canonical. + </help> + <option value="coverage">coverage</option> + <option value="provided">provided</option> + <option value="canonical">canonical</option> + </param> + <param argument="--minConfidenceForCircosPlot" type="select" optional="true" label="Transcript selection"> + <help>The fusion of interest is drawn as a solid line in the circos plot. + To give an impression of the overall degree of rearrangement, + all other fusions are drawn as semi-transparent lines in the background. + This option determines which other fusions should be included in the circos plot. + Values specify the minimum confidence a fusion must have to be included. + It usually makes no sense to include low-confidence fusions in circos plots, + because they are abundant and unreliable, and would clutter up the circos plot. + Default: medium + </help> + <option value="none">none - only the fusion of interest is drawn</option> + <option value="low">low</option> + <option value="medium">medium</option> + <option value="high">high</option> + </param> + <param argument="--showIntergenicVicinity" type="integer" value="" min="0" optional="true" label="Intergenic Vicinity"> + <help>This option only applies to intergenic breakpoints. + If it is set to a value greater than 0, then the script draws the genes + which are no more than the given distance away from an intergenic breakpoint. + Note that this option is incompatible with squishIntrons. + Default: 0 + </help> + </param> + <param argument="--squishIntrons" type="select" optional="true" label="Squish introns"> + <help>Exons usually make up only a small fraction of a gene. + They may be hard to see in the plot. i + Since introns are in most situations of no interest in the context of gene fusions, + this switch can be used to shrink the size of introns to a fixed, negligible size. + It makes sense to disable this feature, if breakpoints in introns are of importance. + Default: TRUE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + + <param argument="--mergeDomainsOverlappingBy" type="float" value="" min="0." max="1.0" optional="true" label="Merge Domains Overlapping By"> + <help>Occasionally, domains are annotated redundantly. + For example, tyrosine kinase domains are frequently annotated as + Protein tyrosine kinase and Protein kinase domain. + In order to simplify the visualization, such domains can be merged into one, + given that they overlap by the given fraction. + The description of the larger domain is used. + Default: 0.9 + </help> + </param> + <param argument="--printExonLabels" type="select" optional="true" label="Print Exon Labels"> + <help>By default the number of an exon is printed inside each exon, + which is taken from the attribute exon_number of the GTF annotation. + When a gene has many exons, the boxes may be too narrow to contain the labels, + resulting in unreadable exon labels. In these situations, i + it may be better to turn off exon labels. + Default: TRUE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + <param argument="--render3dEffect" type="select" optional="true" label="Render 3D effect"> + <help>Whether light and shadow should be rendered to give objects a 3D effect. + Default: TRUE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + <param argument="--optimizeDomainColors" type="select" optional="true" label="Optimize Domain Colors"> + <help>By default, the script colorizes domains according to the colors + specified in the file given in --annotation. + This way, coloring of domains is consistent across all proteins. + But since there are more distinct domains than colors, + this can lead to different domains having the same color. + If this option is set to TRUE, the colors are recomputed for each fusion separately. + This ensures that the colors have the maximum distance for each individual fusion, + but they are no longer consistent across different fusions. + Default: FALSE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + <param argument="--color1" type="color" value="" optional="true" label="Color of the 5' end of the fusion."/> + <param argument="--color2" type="color" value="" optional="true" label="Color of the 3' end of the fusion."/> + <param argument="--pdfWidth" type="float" value="" min="1." optional="true" label="Width of PDF output file in inches" + help="Default: 11.692"/> + <param argument="--pdfHeight" type="float" value="" min="1." optional="true" label="Height of PDF output file in inches" + help="Default: 8.267"/> + <param argument="--fontSize" type="float" value="" min="0." optional="true" label="Scale the size of text" + help="Default: 1.0"/> + </section> + </xml> + <token name="@DRAW_FUSIONS@"> +draw_fusions.R + --fusions='$fusions' + --alignments='Aligned.sortedByCoord.out.bam' + --annotation='$genome.annotation' + --output=fusions.pdf + #if $visualization.cytobands + --cytobands='$visualization.cytobands' + #end if + #if $protein_domains + --proteinDomains='$protein_domains' + #end if + ## Visualization Options + #if $visualization.options.transcriptSelection + --transcriptSelection=$visualization.options.transcriptSelection + #end if + #if $visualization.options.minConfidenceForCircosPlot + --minConfidenceForCircosPlot=$visualization.options.minConfidenceForCircosPlot + #end if + #if $visualization.options.showIntergenicVicinity + --showIntergenicVicinity=$visualization.options.showIntergenicVicinity + #end if + #if $visualization.options.squishIntrons + --squishIntrons=$visualization.options.squishIntrons + #end if + #if $visualization.options.mergeDomainsOverlappingBy + --mergeDomainsOverlappingBy=$visualization.options.mergeDomainsOverlappingBy + #end if + #if $visualization.options.printExonLabels + --printExonLabels=$visualization.options.printExonLabels + #end if + #if $visualization.options.render3dEffect + --render3dEffect=$visualization.options.render3dEffect + #end if + #if $visualization.options.optimizeDomainColors + --optimizeDomainColors=$visualization.options.optimizeDomainColors + #end if + #if $visualization.options.color1 + --color1=$visualization.options.color1 + #end if + #if $visualization.options.color2 + --color2=$visualization.options.color2 + #end if + #if $visualization.options.pdfWidth + --pdfWidth=$visualization.options.pdfWidth + #end if + #if $visualization.options.pdfHeight + --pdfHeight=$visualization.options.pdfHeight + #end if + #if $visualization.options.fontSize + --fontSize=$visualization.options.fontSize + #end if +</token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/arriba_indexes.loc.sample Fri Feb 11 19:08:51 2022 +0000 @@ -0,0 +1,17 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Ariba data files. +#The Arriba script download_references.sh retrieves a genome assembly fasta +#and a related GTF annotation file, then builds a STAR index. +#You will need to create these data files and then create a +#arriba_indexes.loc similar to this one (store it in this +#directory) that points to the directories in which those files are stored. +#The arriba_indexes.loc file has this format (longer white space +#characters are TAB characters): +# +#<unique_build_id> <display_name> <genome_fasta_path> <genome_gtf_path> <STAR_index_path> +# +#Note that STAR indices can become quite large. +# +#<unique_build_id> <display_name> <genome_fasta_path> <genome_gtf_path> <STAR_index_path> +#GRCh38+ENSEMBL93 GRCh38+ENSEMBL93 /depot/GRCh38+ENSEMBL93/genome.fa /depot/GRCh38+ENSEMBL93/genome.gtf /depot/GRCh38+ENSEMBL93/STAR_index/ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Feb 11 19:08:51 2022 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="arriba_indexes" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, fasta, gtf, star_index</columns> + <file path="tool-data/arriba_indexes.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Feb 11 19:08:51 2022 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="arriba_indexes" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, fasta, gtf, star_index</columns> + <file path="${__HERE__}/test-data/arriba_indexes.loc" /> + </table> +</tables>