view jbrowse2/jbrowse2.xml @ 2:22e3d068fdc9 draft

Uploaded
author fubar
date Wed, 03 Jan 2024 06:17:43 +0000
parents cd5d63cd0eb5
children 52842c3f2dda
line wrap: on
line source

 <tool id="jbrowse2" name="JBrowse2" version="@TOOL_VERSION@+@WRAPPER_VERSION@" profile="22.05">
    <description>genome browser</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edamInc"/>
    <xrefs>
        <xref type="bio.tools">jbrowse2</xref>
    </xrefs>
    <expand macro="requirements"/>
    <version_command>python '${__tool_directory__}/jbrowse2.py' --version</version_command>
    <command detect_errors="aggressive"><![CDATA[
mkdir -p '$output.files_path' &&
## Copy the XML file into the directory, mostly for debugging
## but nice if users want to reproduce locally
cp '$trackxml' '$output.files_path/galaxy.xml' &&

export JBROWSE_SOURCE_DIR=\$(dirname \$(which jbrowse))/../opt/jbrowse2  &&

## Once that's done, we run the python script to handle the real work
python '$__tool_directory__/jbrowse2.py'

--jbrowse \${JBROWSE_SOURCE_DIR}
--standalone '$standalone'

--outdir '$output.files_path'
'$trackxml' &&

#if str($standalone) != "data":
    cp '$output.files_path/index.html' '$output'
#else:
    cp '$dummyIndex' '$output'
#end if

## Ugly testing hack since I cannot get <extra_files> to test the files I want to test. Hmph.
#if str($uglyTestingHack) == "enabled":
 &&   cp '$trackxml' '$output'
#end if
  ]]></command>
    <configfiles>
        <configfile name="dummyIndex"><![CDATA[
      <html>
          <head>
          </head>
          <body>
              <h1>JBrowse Data Directory</h1>
              <p>
                Hi! This is not a full JBrowse instance. JBrowse v0.4(+?)
                started shipping with the ability to produce just the
                "data" directory from a JBrowse instance, rather than a
                complete, standalone instance. This was intended to be used
                with the in-development Apollo integration, but may have other
                uses as well.
              </p>
          </body>
      </html>
      ]]></configfile>
        <configfile name="trackxml"><![CDATA[<?xml version="1.0"?>
<root>
    <metadata>
        <genomes>
            #if str($reference_genome.genome_type_select) == "indexed":
              <genome path="${reference_genome.genome.fields.path}">
                 <metadata>
                    <dataset id="${__app__.security.encode_id($dataset.id)}" hid="${dataset.hid}"
                      size="${dataset.get_size(nice_size=True)}"
                      edam_format="${dataset.datatype.edam_format}"
                      file_ext="${dataset.ext}" />
                  </metadata>
              </genome>
            #else
              <genome path="$reference_genome.genome">
                <metadata>
                  <dataset id="${__app__.security.encode_id($reference_genome.genome.id)}" hid="${reference_genome.genome.hid}"
                      size="${reference_genome.genome.get_size(nice_size=True)}"
                      edam_format="${reference_genome.genome.datatype.edam_format}"
                      file_ext="${reference_genome.genome.ext}"
                      dname="${reference_genome.genome.element_identifier}" />
                  <history id="${__app__.security.encode_id($reference_genome.genome.history_id)}"
                      #if $reference_genome.genome.history.user:
                      user_email="${reference_genome.genome.history.user.email}"
                      user_id="${reference_genome.genome.history.user_id}"
                      display_name="${reference_genome.genome.history.get_display_name()}"/>
                      #else
                      user_email="anonymous"
                      user_id="-1"
                      display_name="Unnamed History"
                      />
                      #end if
                  <metadata
                      #for (key, value) in $reference_genome.genome.get_metadata().items():
                      #if "_types" not in $key:
                      ${key}="${value}"
                      #end if
                      #end for
                      />
                  <tool
                      tool_id="${reference_genome.genome.creating_job.tool_id}"
                      tool_version="${reference_genome.genome.creating_job.tool_version}"
                      />
                </metadata>
              </genome>
            #end if
        </genomes>
        <galaxyUrl>${__app__.config.galaxy_infrastructure_url}</galaxyUrl>
    </metadata>
    <tracks>
        #for $tg in $track_groups:
        #for $track in $tg.data_tracks:
        <track cat="${tg.category}" format="${track.data_format.data_format_select}" >
            #if $track.data_format.data_format_select != "rest" and $track.data_format.data_format_select != "sparql":
            <files>
              #for $dataset in $track.data_format.annotation:
              <trackFile path="${dataset}" ext="${dataset.ext}" label="${dataset.element_identifier}">
                <metadata>
                  <dataset id="${__app__.security.encode_id($dataset.id)}" hid="${dataset.hid}"
                      size="${dataset.get_size(nice_size=True)}"
                      edam_format="${dataset.datatype.edam_format}"
                      file_ext="${dataset.ext}" />
                  <history id="${__app__.security.encode_id($dataset.history_id)}"
                      #if $dataset.history.user:
                      user_email="${dataset.history.user.email}"
                      user_id="${dataset.history.user_id}"
                      display_name="${dataset.history.get_display_name()}"/>
                      #else
                      user_email="anonymous"
                      user_id="-1"
                      display_name="Unnamed History"/>
                      #end if
                  <metadata
                    #for (key, value) in $dataset.get_metadata().items():
                    #if "_types" not in $key and $value is not None and len(str($value)) < 5000:
                      ${key}="${value}"
                    #end if
                    #end for
                      />
                  <tool
                      tool_id="${dataset.creating_job.tool_id}"
                      tool_version="${dataset.creating_job.tool_version}"
                      />
                </metadata>
              </trackFile>
              #end for
            </files>
            #end if

            <options>
            #if str($track.data_format.data_format_select) == "gene_calls" or str($track.data_format.data_format_select) == "blast" :
                <style>
                    <className>${track.data_format.jbstyle.style_classname}</className>
                    <description>${track.data_format.jbstyle.style_description}</description>
                    <label>${track.data_format.jbstyle.style_label}</label>
                    <height>${track.data_format.jbstyle.style_height}</height>
                    <maxHeight>${track.data_format.jbstyle.max_height}</maxHeight>
                </style>
            #else if str($track.data_format.data_format_select) == "pileup":
                <pileup>
                    <bam_indices>
                        #for $dataset in $track.data_format.annotation:
                        <bam_index>${dataset.metadata.bam_index}</bam_index>
                        #end for
                    </bam_indices>
                    <chunkSizeLimit>${track.data_format.chunkSizeLimit}</chunkSizeLimit>
                </pileup>
            #end if
            #if str($track.data_format.data_format_select) == "blast":
                <blast>
                  #if str($track.data_format.blast_parent) != "":
                    <parent>${track.data_format.blast_parent}</parent>
                  #end if
                    <protein>${track.data_format.is_protein}</protein>
                    <min_gap>${track.data_format.min_gap}</min_gap>
                    <index>${track.data_format.index}</index>
                </blast>
            #end if
            </options>
        </track>
        #end for
        #end for
    </tracks>
     <plugins>
     </plugins>
</root>
]]></configfile>
    </configfiles>
    <inputs>
        <conditional name="reference_genome">
            <param help="Built-in references" label="Reference genome to display" name="genome_type_select" type="select">
                <option selected="True" value="indexed">Use a built-in genome</option>
                <option value="history">Use a genome from history</option>
            </param>
            <when value="indexed">
                <param
                    help="If your genome of interest is not listed, contact the Galaxy team"
                    label="Select a reference genome"
                    name="genome"
                    type="select">
                    <options from_data_table="all_fasta">
                        <filter column="2" type="sort_by"/>
                        <validator message="No genomes are available for the selected input dataset" type="no_options">
                            </validator>
                    </options>
                </param>
            </when>
            <when value="history">
                <param
                    format="fasta"
                    label="Select the reference genome"
                    name="genome"
                    type="data">
                </param>
            </when>
        </conditional>

        <param name="standalone" label="Output JBrowse" type="select">
            <option value="complete">Complete, for viewing and further local development (JBrowse, tools, documentation, etc.)
            </option>
            <option value="minimal" selected="true">Minimal for viewing (Documentation removed)
            </option>
        </param>

        <repeat name="track_groups" title="Track Group">
            <param label="Track Category"
                name="category"
                type="text"
                value="Default"
                help="Organise your tracks into Categories for a nicer end-user experience. You can use #date# and it will be replaced with the current date in 'yyyy-mm-dd' format, which is very useful for repeatedly updating a JBrowse instance when member databases / underlying tool versions are updated." optional="False">
             </param>
            <repeat name="data_tracks" title="Annotation Track">
                <conditional name="data_format" label="Track Options">
                    <param type="select" label="Track Type" name="data_format_select">
                        <option value="blast">Blast XML</option>
                        <option value="gene_calls">GFF/GFF3/BED Features</option>
                        <option value="hic">HiC data (convert .cool with hicexplorer)</option>
                        <option value="pileup">BAM Pileups</option>
                        <option value="vcf">VCF SNPs</option>
                        <option value="wiggle">BigWig XY</option>
                    </param>
                    <when value="hic">
                        <expand macro="input_conditional" label="HiC Track Data" format="hic" help="Cool files must be converted first with hicexplorer" />
                    </when>
                    <when value="blast">
                        <expand macro="input_conditional" label="BlastXML Track Data" format="blastxml" />
                        <expand macro="track_styling"
                                classname="feature"
                                label="description"
                                description="Hit_titles"
                                height="600px"/>
                        <param label="Features used in Blast Search"
                            help="in GFF3. This is used  so we know where to map features. E.g. where results of which CDS Protein32 match up to. The query IDs in your blast results should MATCH some feature IDs in your GFF3 file. This is an optional field and is most useful if using JBrowse to display protein blast results on a DNA genome. blastn results don't need this, blastp results on a protein sequence don't need this."
                            format="gff3"
                            name="blast_parent"
                            optional="true"
                            type="data"/>

                        <param label="Minimum Gap Size"
                            help="before a new match_part feature is created"
                            name="min_gap"
                            type="integer"
                            value="10"
                            min="2" />
                        <param label="Is this a protein blast search?"
                            type="boolean"
                            name="is_protein"
                            truevalue="true"
                            falsevalue="false" />

                        <param label="Index this track" name="index" type="boolean" checked="false" truevalue="true" falsevalue="false" />
                    </when>
                    <when value="vcf">
                        <expand macro="input_conditional" label="SNP Track Data" format="vcf" />
                    </when>
                    <when value="gene_calls">
                        <expand macro="input_conditional" label="GFF/GFF3/BED Track Data" format="gff,gff3,bed" />
                        <expand macro="track_styling"
                                classname="feature"
                                label="product,name,id"
                                description="note,description"
                                height="10px"/>
                        <conditional name="match_part" label="match/match_part data">
                            <param type="select" label="Match part" name="matchp">
                                <option value="false" selected="True">"No"</option>
                                <option value="true">"Yes"</option>
                            </param>
                            <when value="true">
                                <param label="Match Part Feature Type"
                                    name="name"
                                    type="text"
                                    value="match"
                                    help="Match_parts have several options for the parent feature type, such as cDNA_match, match, translated_nucleotide_match, etc. Please select the appropriate one here. You can leave empty to try autodetection (only works with CanvasFeatures track type)."
                                    optional="True"/>
                            </when>
                            <when value="false" />
                        </conditional>
                    </when>
                    <when value="pileup">
                        <expand macro="input_conditional" label="BAM Track Data" format="bam" />
                            <param type="select" label="Autogenerate SNP Track"
                            help="Not recommended for deep coverage BAM files" name="autogen">
                                <option value="false" selected="True">"No"</option>
                                <option value="true">"Yes"</option>
                            </param>
                        <param label="Maximum size of BAM chunks"
                            name="chunkSizeLimit"
                            type="integer"
                            help="Maximum size in bytes of BAM chunks that the browser will try to deal with. When this is exceeded, most tracks will display 'Too much data' message."
                            value="5000000" />
                    </when>
                    <when value="wiggle">
                        <expand macro="input_conditional" label="BigWig Track Data" format="bigwig" />
                    </when>
                </conditional>
            </repeat>
        </repeat>
        <param type="hidden" name="uglyTestingHack" value="" />
    </inputs>
    <outputs>
        <data format="html" name="output" label="JBrowse on $on_string - $standalone"/>
    </outputs>
    <tests>
        <test>
            <param name="reference_genome|genome_type_select" value="history"/>
            <param name="reference_genome|genome" value="merlin.fa"/>
                <param name="standalone" value="minimal" />
            <param name="uglyTestingHack" value="enabled" />
            <output name="output">
                <assert_contents>
                    <has_text text="genome path="></has_text>
                    <has_text text="dataset id="></has_text>
                    <has_text text="history id="></has_text>
                    <has_text text="metadata"></has_text>
                    <has_text text="tool_id"></has_text>
                </assert_contents>
            </output>
            </test>
            <test>
            <param name="reference_genome|genome_type_select" value="history"/>
            <param name="reference_genome|genome" value="merlin.fa"/>
            <param name="standalone" value="minimal" />
            <repeat name="track_groups">
                <param name="category" value="Default" />
                <repeat name="data_tracks">
                    <conditional name="data_format">
                        <param name="data_format_select" value="gene_calls"/>
                        <param name="annotation" value="bed/test-3.bed,bed/test-6.bed"/>
                    </conditional>
                </repeat>
            </repeat>
            <param name="uglyTestingHack" value="enabled" />
            <output name="output">
                <assert_contents>
                    <has_text text="genome path="></has_text>
                    <has_text text="dataset id="></has_text>
                    <has_text text="history id="></has_text>
                    <has_text text="metadata"></has_text>
                    <has_text text="tool_id"></has_text>
                    <has_text text="ext=&quot;bed&quot; label=&quot;test-3.bed&quot;"></has_text>
                </assert_contents>
            </output>
        </test>
        <test>
            <conditional name="reference_genome">
                 <param name="genome_type_select" value="history"/>
                 <param name="genome" value="merlin.fa"/>
            </conditional>
            <param name="standalone" value="minimal" />
            <repeat name="track_groups">
                <param name="category" value="Auto Coloured" />
                <repeat name="data_tracks">
                    <conditional name="data_format">
                        <param name="data_format_select" value="pileup"/>
                        <param name="annotation" value="bam/merlin-sample.bam"/>
                    </conditional>
                </repeat>
            </repeat>

            <param name="uglyTestingHack" value="enabled" />
            <output name="output">
                <assert_contents>
                    <has_text text="merlin-sample.bam"/>
                    <has_text text="dname=&quot;merlin.fa&quot;"/>
                    <has_text text="bam_index"/>
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="reference_genome|genome_type_select" value="history"/>
            <param name="reference_genome|genome" value="merlin.fa"/>
            <param name="standalone" value="minimal" />
            <param name="uglyTestingHack" value="enabled" />
            <output name="output">
                <assert_contents>
                    <has_text text="merlin.fa"/>
                </assert_contents>
            </output>
        </test>

    </tests>
    <help><![CDATA[

JBrowse2-in-Galaxy
==================

JBrowse2-in-Galaxy offers a highly configurable, workflow-compatible
alternative to JBrowse1-in-Galaxy and Trackster.

Compared to JBrowse1-in-Galaxy, there is no support for alternative codons for unusual genomes,
and detailed track styling is not yet implemented. Send code.
JBrowse1 development has now ceased in favour of JBrowse2.


Overview
--------

JBrowse is a fast, embeddable genome browser built completely with
JavaScript and HTML5.

The JBrowse-in-Galaxy (JiG) tool was written to help build complex
JBrowse installations straight from Galaxy, taking advantage of the
latest Galaxy features such as dataset collections, sections, and colour
pickers. It allows you to build up a JBrowse instance without worrying
about how to run the command line tools to format your data, and which
options need to be supplied and where. Additionally it comes with many
javascript functions to handle colouring of features which would be
nearly impossible to write without the assistance of this tool.

The JBrowse-in-Galaxy tool is maintained by `the Galaxy IUC
<https://github.com/galaxyproject/tools-iuc/issues>`__, who you can help you
with missing features or bugs in the tool.

Options
-------

The first option you encounter is the **Fasta Sequence(s)**. This option
now accepts multiple fasta files, allowing you to build JBrowse
instances that contain data for multiple genomes or chrosomomes
(generally known as "landmark features" in gff3 terminology.) Up to 30
will be shown from the dropdown selector within JBrowse, this is a known
issue.

**Track Groups** represent a set of tracks in a single category. These
can be used to let your users understand relationships between large
groups of tracks.

.. image:: sections.png

Annotation Tracks
-----------------

Within Track Groups, you have one or more **Annotation Tracks**. Each
Annotation Track is a groups of datasets which have similar styling.
This allows you to rapidly build up JBrowse instances without having to
configure tracks individually. A massive improvement over previous
versions. For example, if you have five different GFF3 files from
various gene callers that you wish to display, you can take advantage of
this feature to style all of them similarly.

There are a few different types of tracks supported, each with their own
set of options:

GFF3/BED
~~~~~~~~

These are your standard feature tracks. They usually highlight genes,
mRNAs and other features of interest along a genomic region. The
underlying tool and this help documentation focus primarily on GFF3
data, and have not been tested extensively with other formats. Automatic
min/max detection will fail under BED datasets.

BAM Pileups
~~~~~~~~~~~

We support BAM files and can automatically generate SNP tracks based on
that bam data.

.. image:: bam.png

This is *strongly discouraged* for high coverage density datasets.
Unfortunately there are no other configuration options exposed for bam
files.

BlastXML
~~~~~~~~

.. image:: blast.png

JiG now supports both blastn and blastp datasets. JiG internally uses a
blastXML to gapped GFF3 tool to convert your blastxml datasets into a
format amenable to visualization in JBrowse. This tool is also
available separately from the IUC on the toolshed.

**Minimum Gap Size** reflects how long a gap must be before it becomes a
real gap in the processed gff3 file. In the picture above, various sizes
of gaps can be seen. If the minimum gap size was set much higher, say
100nt, many of the smaller gaps would disappear, and the features on
both sides would be merged into one, longer feature. This setting is
inversely proportional to runtime and output file size. *Do not set this
to a low value for large datasets*. By setting this number lower, you
will have extremely large outputs and extremely long runtimes. The
default was configured based off of the author's experience, but the
author only works on small viruses. It is *strongly* recommended that
you filter your blast results before display, e.g. picking out the top
10 hits or so.

**Protein blast search** option merely informs underlying tools that
they should adjust feature locations by 3x.

Bigwig XY
~~~~~~~~~

.. image:: bigwig.png

**XYPlot**

BigWig tracks can be displayed as a "density" plot which is a continuous
line which varies in colour, or as an "XYplot." XYplots are preferable
for users to visually identify specific features in a bigwig track,
however density tracks are more visually compact.

VCFs/SNPs
~~~~~~~~~

These tracks do not support any special configuration.

@ATTRIBUTION@
]]></help>
    <expand macro="citations"/>
</tool>