Mercurial > repos > jjohnson > arriba_download_reference
changeset 0:7345cb1bb772 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/arriba commit c1d05da7c2c76feae94cbc640be7b010f31397d2-dirty"
author | jjohnson |
---|---|
date | Fri, 11 Feb 2022 19:09:19 +0000 |
parents | |
children | 55ca46d68a57 |
files | arriba_download_reference.xml macros.xml static/images/draw-fusions-example.png test-data/genome.fasta.gz test-data/genome.gtf.gz tool-data/arriba_indexes.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 8 files changed, 354 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arriba_download_reference.xml Fri Feb 11 19:09:19 2022 +0000 @@ -0,0 +1,116 @@ +<tool id="arriba_download_reference" name="Arriba Reference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> + <description>Download to history</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="exit_code"><![CDATA[ + echo $arriba_reference_name > '$star_index' && + BASE_DIR=\$(dirname \$(dirname `which arriba`)) && + REF_SCRIPT=`find \$BASE_DIR -name 'download_references.sh'` && + #if $is_test != 'yes' + \$REF_SCRIPT '$arriba_reference_name' && + cp *.fa* '$genome_fasta' && + cp *.gtf* '$genome_gtf' && + mv STAR_index_* '$star_index.extra_files_path' + #else + [[ -x \$REF_SCRIPT ]] + #end if + ]]></command> + <inputs> + <param name="is_test" type="hidden" value="no"/> + <param name="arriba_reference_name" type="select" label="Select reference"> + <option value="GRCh38+ENSEMBL93">GRCh38+ENSEMBL93</option> + <option value="GRCh38+GENCODE28">GRCh38+GENCODE28</option> + <option value="GRCh38+RefSeq">GRCh38+RefSeq</option> + <option value="GRCh38viral+ENSEMBL93">GRCh38viral+ENSEMBL93</option> + <option value="GRCh38viral+GENCODE28">GRCh38viral+GENCODE28</option> + <option value="GRCh38viral+RefSeq">GRCh38viral+RefSeq</option> + <option value="hg38+ENSEMBL93">hg38+ENSEMBL93</option> + <option value="hg38+GENCODE28">hg38+GENCODE28</option> + <option value="hg38+RefSeq">hg38+RefSeq</option> + <option value="hg38viral+ENSEMBL93">hg38viral+ENSEMBL93</option> + <option value="hg38viral+GENCODE28">hg38viral+GENCODE28</option> + <option value="hg38viral+RefSeq">hg38viral+RefSeq</option> + <option value="GRCh37+ENSEMBL87">GRCh37+ENSEMBL87</option> + <option value="GRCh37+GENCODE19">GRCh37+GENCODE19</option> + <option value="GRCh37+RefSeq">GRCh37+RefSeq</option> + <option value="GRCh37viral+ENSEMBL87">GRCh37viral+ENSEMBL87</option> + <option value="GRCh37viral+GENCODE19">GRCh37viral+GENCODE19</option> + <option value="GRCh37viral+RefSeq">GRCh37viral+RefSeq</option> + <option value="hg19+ENSEMBL87">hg19+ENSEMBL87</option> + <option value="hg19+GENCODE19">hg19+GENCODE19</option> + <option value="hg19+RefSeq">hg19+RefSeq</option> + <option value="hg19viral+ENSEMBL87">hg19viral+ENSEMBL87</option> + <option value="hg19viral+GENCODE19">hg19viral+GENCODE19</option> + <option value="hg19viral+RefSeq">hg19viral+RefSeq</option> + <option value="hs37d5+ENSEMBL87">hs37d5+ENSEMBL87</option> + <option value="hs37d5+GENCODE19">hs37d5+GENCODE19</option> + <option value="hs37d5+RefSeq">hs37d5+RefSeq</option> + <option value="hs37d5viral+ENSEMBL87">hs37d5viral+ENSEMBL87</option> + <option value="hs37d5viral+GENCODE19">hs37d5viral+GENCODE19</option> + <option value="hs37d5viral+RefSeq">hs37d5viral+RefSeq</option> + <option value="GRCm39+GENCODEM26">GRCm39+GENCODEM26</option> + <option value="GRCm39+RefSeq">GRCm39+RefSeq</option> + <option value="GRCm39viral+GENCODEM26">GRCm39viral+GENCODEM26</option> + <option value="GRCm39viral+RefSeq">GRCm39viral+RefSeq</option> + <option value="GRCm38+GENCODEM25">GRCm38+GENCODEM25</option> + <option value="GRCm38+RefSeq">GRCm38+RefSeq</option> + <option value="GRCm38viral+GENCODEM25">GRCm38viral+GENCODEM25</option> + <option value="GRCm38viral+RefSeq">GRCm38viral+RefSeq</option> + <option value="mm39+GENCODEM26">mm39+GENCODEM26</option> + <option value="mm39+RefSeq">mm39+RefSeq</option> + <option value="mm39viral+GENCODEM26">mm39viral+GENCODEM26</option> + <option value="mm39viral+RefSeq">mm39viral+RefSeq</option> + <option value="mm10+GENCODEM25">mm10+GENCODEM25</option> + <option value="mm10+RefSeq">mm10+RefSeq</option> + <option value="mm10viral+GENCODEM25">mm10viral+GENCODEM25</option> + <option value="mm10viral+RefSeq">mm10viral+RefSeq</option> + </param> + </inputs> + <outputs> + <data name="genome_fasta" format="fasta" label="${tool.name} ${arriba_reference_name} fasta"/> + <data name="genome_gtf" format="gtf" label="${tool.name} ${arriba_reference_name} GTF"/> + <data name="star_index" format="txt" label="${tool.name} ${arriba_reference_name} STAR index"/> + </outputs> + <tests> + <!-- Downloading a genome and annotation plus build a STAR index requires too many resources for testing. + Just test that we can locate the script. --> + <test> + <param name="is_test" value="yes"/> + <param name="arriba_reference_name" value="GRCh38+ENSEMBL93"/> + <output name="star_index"> + <assert_contents> + <has_text text="GRCh38+ENSEMBL93"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +** Arriba Reference ** + +Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions. +It is based on chimeric alignments found by the STAR RNA-Seq aligner. + +**Arriba Reference** downloads a genome sequence fasta and its related annotation GTF, and then build a STAR index for the RNA STAR aligner. + +These datasets will be added to your Galaxy history: + + - genome assembly fasta + - genome annotation GTF + - STAR index + +See Arriba manual pages: + + - https://arriba.readthedocs.io/en/latest/workflow/ + - https://arriba.readthedocs.io/en/latest/input-files/ + + +**NOTE:** This is a resource intensive process, so the results should be copied to new histories as needed rather than running this in each workflow. + +.. _Arriba: https://arriba.readthedocs.io/en/latest/ + +]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Feb 11 19:09:19 2022 +0000 @@ -0,0 +1,209 @@ +<macros> + <token name="@TOOL_VERSION@">2.2.1</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">arriba</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1101/gr.257246.119</citation> + <yield /> + </citations> + </xml> + <xml name="version_command"> + <version_command>arriba -h | grep Version | sed 's/^.* //'</version_command> + </xml> + <xml name="genome_source" token_assembly_optional="false" > + <conditional name="genome"> + <param name="genome_source" type="select" label="Arriba Genome assembly and annotation source"> + <option value="history">From your history</option> + <option value="cached">Use built-in Arriba</option> + </param> + <when value="history"> + <param name="assembly" argument="-a" type="data" format="fasta" optional="@ASSEMBLY_OPTIONAL@" label="Genome assembly fasta"/> + <param name="annotation" argument="-g" type="data" format="gtf" label="Gene annotation in GTF format"/> + </when> + <when value="cached"> + <param name="arriba_ref" type="select" label="Arriba Genome assembly and annotation"> + <options from_data_table="arriba_indexes"> + </options> + </param> + </when> + </conditional> + </xml> + <token name="@GENOME_SOURCE@"> +#if str($genome.genome_source) == "history" + #if $genome.assembly + #set $genome_assembly = $genome.assembly + #end if + #set $genome_annotation = $genome.annotation +#else + #set $genome_assembly = $genome.arriba_ref.fields.fasta + #set $genome_annotation = $genome.arriba_ref.fields.gtf +#end if +</token> + + <xml name="visualization_options"> + <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/> + <section name="options" expanded="false" title="Draw Fusion Options"> + <param argument="--transcriptSelection" type="select" optional="true" label="Transcript selection"> + <help>By default the transcript isoform with the highest coverage is drawn. + Alternatively, the transcript isoform that is provided in the columns + transcript_id1 and transcript_id2 in the given fusions file can be drawn. + Selecting the isoform with the highest coverage usually produces nicer plots, + in the sense that the coverage track is smooth and shows a visible increase in coverage after the fusion breakpoint. + However, the isoform with the highest coverage may not be the one that is involved in the fusion. + Often, genomic rearrangements lead to non-canonical isoforms being transcribed. + For this reason, it can make sense to rely on the transcript selection provided by the columns transcript_id1/2, + which reflect the actual isoforms involved in a fusion. +\ As a third option, the transcripts that are annotated as canonical can be drawn. + Transcript isoforms tagged with appris_principal, appris_candidate, or CCDS are considered canonical. + </help> + <option value="coverage">coverage</option> + <option value="provided">provided</option> + <option value="canonical">canonical</option> + </param> + <param argument="--minConfidenceForCircosPlot" type="select" optional="true" label="Transcript selection"> + <help>The fusion of interest is drawn as a solid line in the circos plot. + To give an impression of the overall degree of rearrangement, + all other fusions are drawn as semi-transparent lines in the background. + This option determines which other fusions should be included in the circos plot. + Values specify the minimum confidence a fusion must have to be included. + It usually makes no sense to include low-confidence fusions in circos plots, + because they are abundant and unreliable, and would clutter up the circos plot. + Default: medium + </help> + <option value="none">none - only the fusion of interest is drawn</option> + <option value="low">low</option> + <option value="medium">medium</option> + <option value="high">high</option> + </param> + <param argument="--showIntergenicVicinity" type="integer" value="" min="0" optional="true" label="Intergenic Vicinity"> + <help>This option only applies to intergenic breakpoints. + If it is set to a value greater than 0, then the script draws the genes + which are no more than the given distance away from an intergenic breakpoint. + Note that this option is incompatible with squishIntrons. + Default: 0 + </help> + </param> + <param argument="--squishIntrons" type="select" optional="true" label="Squish introns"> + <help>Exons usually make up only a small fraction of a gene. + They may be hard to see in the plot. i + Since introns are in most situations of no interest in the context of gene fusions, + this switch can be used to shrink the size of introns to a fixed, negligible size. + It makes sense to disable this feature, if breakpoints in introns are of importance. + Default: TRUE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + + <param argument="--mergeDomainsOverlappingBy" type="float" value="" min="0." max="1.0" optional="true" label="Merge Domains Overlapping By"> + <help>Occasionally, domains are annotated redundantly. + For example, tyrosine kinase domains are frequently annotated as + Protein tyrosine kinase and Protein kinase domain. + In order to simplify the visualization, such domains can be merged into one, + given that they overlap by the given fraction. + The description of the larger domain is used. + Default: 0.9 + </help> + </param> + <param argument="--printExonLabels" type="select" optional="true" label="Print Exon Labels"> + <help>By default the number of an exon is printed inside each exon, + which is taken from the attribute exon_number of the GTF annotation. + When a gene has many exons, the boxes may be too narrow to contain the labels, + resulting in unreadable exon labels. In these situations, i + it may be better to turn off exon labels. + Default: TRUE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + <param argument="--render3dEffect" type="select" optional="true" label="Render 3D effect"> + <help>Whether light and shadow should be rendered to give objects a 3D effect. + Default: TRUE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + <param argument="--optimizeDomainColors" type="select" optional="true" label="Optimize Domain Colors"> + <help>By default, the script colorizes domains according to the colors + specified in the file given in --annotation. + This way, coloring of domains is consistent across all proteins. + But since there are more distinct domains than colors, + this can lead to different domains having the same color. + If this option is set to TRUE, the colors are recomputed for each fusion separately. + This ensures that the colors have the maximum distance for each individual fusion, + but they are no longer consistent across different fusions. + Default: FALSE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + <param argument="--color1" type="color" value="" optional="true" label="Color of the 5' end of the fusion."/> + <param argument="--color2" type="color" value="" optional="true" label="Color of the 3' end of the fusion."/> + <param argument="--pdfWidth" type="float" value="" min="1." optional="true" label="Width of PDF output file in inches" + help="Default: 11.692"/> + <param argument="--pdfHeight" type="float" value="" min="1." optional="true" label="Height of PDF output file in inches" + help="Default: 8.267"/> + <param argument="--fontSize" type="float" value="" min="0." optional="true" label="Scale the size of text" + help="Default: 1.0"/> + </section> + </xml> + <token name="@DRAW_FUSIONS@"> +draw_fusions.R + --fusions='$fusions' + --alignments='Aligned.sortedByCoord.out.bam' + --annotation='$genome.annotation' + --output=fusions.pdf + #if $visualization.cytobands + --cytobands='$visualization.cytobands' + #end if + #if $protein_domains + --proteinDomains='$protein_domains' + #end if + ## Visualization Options + #if $visualization.options.transcriptSelection + --transcriptSelection=$visualization.options.transcriptSelection + #end if + #if $visualization.options.minConfidenceForCircosPlot + --minConfidenceForCircosPlot=$visualization.options.minConfidenceForCircosPlot + #end if + #if $visualization.options.showIntergenicVicinity + --showIntergenicVicinity=$visualization.options.showIntergenicVicinity + #end if + #if $visualization.options.squishIntrons + --squishIntrons=$visualization.options.squishIntrons + #end if + #if $visualization.options.mergeDomainsOverlappingBy + --mergeDomainsOverlappingBy=$visualization.options.mergeDomainsOverlappingBy + #end if + #if $visualization.options.printExonLabels + --printExonLabels=$visualization.options.printExonLabels + #end if + #if $visualization.options.render3dEffect + --render3dEffect=$visualization.options.render3dEffect + #end if + #if $visualization.options.optimizeDomainColors + --optimizeDomainColors=$visualization.options.optimizeDomainColors + #end if + #if $visualization.options.color1 + --color1=$visualization.options.color1 + #end if + #if $visualization.options.color2 + --color2=$visualization.options.color2 + #end if + #if $visualization.options.pdfWidth + --pdfWidth=$visualization.options.pdfWidth + #end if + #if $visualization.options.pdfHeight + --pdfHeight=$visualization.options.pdfHeight + #end if + #if $visualization.options.fontSize + --fontSize=$visualization.options.fontSize + #end if +</token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/arriba_indexes.loc.sample Fri Feb 11 19:09:19 2022 +0000 @@ -0,0 +1,17 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Ariba data files. +#The Arriba script download_references.sh retrieves a genome assembly fasta +#and a related GTF annotation file, then builds a STAR index. +#You will need to create these data files and then create a +#arriba_indexes.loc similar to this one (store it in this +#directory) that points to the directories in which those files are stored. +#The arriba_indexes.loc file has this format (longer white space +#characters are TAB characters): +# +#<unique_build_id> <display_name> <genome_fasta_path> <genome_gtf_path> <STAR_index_path> +# +#Note that STAR indices can become quite large. +# +#<unique_build_id> <display_name> <genome_fasta_path> <genome_gtf_path> <STAR_index_path> +#GRCh38+ENSEMBL93 GRCh38+ENSEMBL93 /depot/GRCh38+ENSEMBL93/genome.fa /depot/GRCh38+ENSEMBL93/genome.gtf /depot/GRCh38+ENSEMBL93/STAR_index/ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Feb 11 19:09:19 2022 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="arriba_indexes" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, fasta, gtf, star_index</columns> + <file path="tool-data/arriba_indexes.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Feb 11 19:09:19 2022 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="arriba_indexes" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, fasta, gtf, star_index</columns> + <file path="${__HERE__}/test-data/arriba_indexes.loc" /> + </table> +</tables>