changeset 31:301316cb596a draft

planemo upload for repository https://github.com/bardin-lab/readtagger/tree/master/galaxy commit 6e01a2e472ebbb07ce5181b836bae8bc5c7ecf36-dirty
author mvdbeek
date Wed, 21 Jun 2017 09:38:25 -0400
parents db5c766503dd
children bf3d04937095
files add_matesequence.xml allow_dovetailing.xml bam_readtagger.xml bwa_mem_index.loc.sample findcluster.xml macros.xml test-data/three_cluster_out.gff tool_data_table_conf.xml.sample update_mapq.xml write_supplementary_fastq.xml
diffstat 10 files changed, 117 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/add_matesequence.xml	Thu May 11 05:33:27 2017 -0400
+++ b/add_matesequence.xml	Wed Jun 21 09:38:25 2017 -0400
@@ -1,7 +1,7 @@
-<tool id="add_matesequence" name="Add matesequence" version="0.3.24">
+<tool id="add_matesequence" name="Add matesequence" version="0.3.25">
     <description>into tag field</description>
     <requirements>
-        <requirement type="package" version="0.3.24">readtagger</requirement>
+        <requirement type="package" version="0.3.25">readtagger</requirement>
     </requirements>
     <version_command>add_matesequence --version</version_command>
     <command detect_errors="aggressive"><![CDATA[
--- a/allow_dovetailing.xml	Thu May 11 05:33:27 2017 -0400
+++ b/allow_dovetailing.xml	Wed Jun 21 09:38:25 2017 -0400
@@ -1,7 +1,7 @@
-<tool id="allow_dovetailing" name="Allow dovetailing" version="0.3.24">
+<tool id="allow_dovetailing" name="Allow dovetailing" version="0.3.25">
     <description>modifies proper_pair flag in bam files</description>
     <requirements>
-        <requirement type="package" version="0.3.24">readtagger</requirement>
+        <requirement type="package" version="0.3.25">readtagger</requirement>
     </requirements>
     <command detect_errors="aggressive"><![CDATA[
         allow_dovetailing -i '$input' -o '$output'
--- a/bam_readtagger.xml	Thu May 11 05:33:27 2017 -0400
+++ b/bam_readtagger.xml	Wed Jun 21 09:38:25 2017 -0400
@@ -1,10 +1,10 @@
-<tool id="bam_readtagger" name="Tag alignment files" version="0.3.24">
+<tool id="bam_readtagger" name="Tag alignment files" version="0.3.25">
     <description>from multiple bam files</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <requirements>
-        <requirement type="package" version="0.3.24">readtagger</requirement>
+        <requirement type="package" version="0.3.25">readtagger</requirement>
     </requirements>
     <command detect_errors="aggressive"><![CDATA[
         readtagger -t '$tag_file' -s
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa_mem_index.loc.sample	Wed Jun 21 09:38:25 2017 -0400
@@ -0,0 +1,38 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of BWA indexed sequences data files. You will need
+#to create these data files and then create a bwa_index.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bwa_index.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_path>
+#
+#So, for example, if you had phiX indexed stored in 
+#/depot/data2/galaxy/phiX/base/, 
+#then the bwa_index.loc entry would look like this:
+#
+#phiX174   phiX   phiX Pretty   /depot/data2/galaxy/phiX/base/phiX.fa
+#
+#and your /depot/data2/galaxy/phiX/base/ directory
+#would contain phiX.fa.* files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 phiX.fa.amb
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 phiX.fa.ann
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 phiX.fa.bwt
+#...etc...
+#
+#Your bwa_index.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files.  For example:
+#
+#phiX174				phiX	phiX174			/depot/data2/galaxy/phiX/base/phiX.fa
+#hg18canon				hg18	hg18 Canonical	/depot/data2/galaxy/hg18/base/hg18canon.fa
+#hg18full				hg18	hg18 Full		/depot/data2/galaxy/hg18/base/hg18full.fa
+#/orig/path/hg19.fa		hg19	hg19			/depot/data2/galaxy/hg19/base/hg19.fa
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
--- a/findcluster.xml	Thu May 11 05:33:27 2017 -0400
+++ b/findcluster.xml	Wed Jun 21 09:38:25 2017 -0400
@@ -1,7 +1,10 @@
-<tool id="findcluster" name="Find clusters of reads" version="0.3.24">
+<tool id="findcluster" name="Find clusters of reads" version="0.3.25">
     <description>in bam files</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
     <requirements>
-        <requirement type="package" version="0.3.24">readtagger</requirement>
+        <requirement type="package" version="0.3.25">readtagger</requirement>
     </requirements>
     <version_command>findcluster --version</version_command>
     <command detect_errors="aggressive"><![CDATA[
@@ -9,8 +12,19 @@
         ln -f -s $input.metadata.bam_index input.bam.bai &&
         findcluster
         --input_path input.bam
-        #if $reference_fasta:
-            --reference_fasta '$reference_fasta'
+        #if $transposon_source.ref_file:
+            #if str($transposon_source.reference_source_selector) == "history":
+                --transposon_reference_fasta '$transposon_source.ref_file'
+            #else :
+                --transposon_bwa_index '$reference_source.ref_file.fields.path'
+            #end if
+        #end if
+        #if $genome_source.ref_file:
+            #if str($genome_source.reference_source_selector) == "history":
+                --genome_reference_fasta '$genome_source.ref_file'
+            #else :
+                --genome_bwa_index '$reference_source.ref_file.fields.path'
+            #end if
         #end if
         --output_bam '$output_bam'
         --output_gff '$output_gff'
@@ -20,7 +34,9 @@
     ]]></command>
     <inputs>
         <param name="input" argument="--input_path" type="data" format="bam"/>
-        <param argument="--reference_fasta" label="Reference Fasta" help="Reconstructed contigs at clusters will be blasted against this sequence." type="data" format="fasta" optional="True"/>
+
+        <expand macro="reference_source_conditional" reference_type="transposon"/>
+        <expand macro="reference_source_conditional" reference_type="genome"/>
     </inputs>
     <outputs>
         <data name="output_bam" format="bam" label="findcluster BAM on $on_string"/>
@@ -35,7 +51,8 @@
         </test>
         <test>
             <param name="input" value="extended_and_annotated_roi.bam" ftype="bam"/>
-            <param name="reference_fasta" value="reference.fasta" ftype="fasta"/>
+            <param name="transposon_source|reference_source_selector" value="history"/>
+            <param name="transposon_source|ref_file" value="reference.fasta" ftype="fasta"/>
             <output name="output_bam" file="three_cluster_out.bam" ftype="bam" lines_diff="2"/>
             <output name="output_gff">
                 <assert_contents>
@@ -51,24 +68,35 @@
 
       Find clusters of reads that support a TE insertion.
 
-        Options:
+    Options:
       --input_path PATH               Find cluster in this BAM file.
+      --region TEXT                   Find clusters in this Region (Format is
+                                      chrX:2000-1000).
+      --max_proper_pair_size INTEGER  Maximum proper pairs size. If not given will
+                                      be inferred from the data.
       --output_bam PATH               Write out BAM file with cluster information
                                       to this path. Reads will have an additional
                                       "CD" tag to indicate the cluster number
       --output_gff PATH               Write out GFF file with cluster information
                                       to this path.
+      --output_fasta PATH             Write out supporting evidence for clusters
+                                      to this path.
       --sample_name TEXT              Sample name to use when writing out clusters
                                       in GFF file. Default is to infer the name
                                       from the input filename.
       --include_duplicates / --no-include_duplicates
                                       Include reads marked as duplicates when
                                       finding clusters.
-      --reference_fasta TEXT          Blast cluster contigs against this fasta
-                                      file
-      --blastdb TEXT                  Blast cluster contigs against this blast
-                                      database
+      --transposon_reference_fasta TEXT
+                                      Transposon fasta to align clipped reads to.
+                                      Not necessary if BWA index is provided.
+      --transposon_bwa_index TEXT     Transposon BWA index to align clipped reads
+                                      to
+      --genome_reference_fasta TEXT   Genome fasta to align clipped reads to. Not
+                                      necessary if BWA index is provided.
+      --genome_bwa_index TEXT         Genome BWA index to align clipped reads to
       --threads INTEGER RANGE         Threads to use for cap3 assembly step
+      --shm_dir PATH                  Path to shared memory folder
       --version                       Show the version and exit.
       --help                          Show this message and exit.
 
--- a/macros.xml	Thu May 11 05:33:27 2017 -0400
+++ b/macros.xml	Wed Jun 21 09:38:25 2017 -0400
@@ -26,4 +26,24 @@
         <option value="X">X</option>
         <option value="Y">Y</option>
     </xml>
+    <xml name="reference_source_conditional" token_reference_type="reference_type">
+        <conditional name="@REFERENCE_TYPE@_source">
+            <param name="reference_source_selector" type="select" label="Will you select a @REFERENCE_TYPE@ reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below">
+                <option value="cached">Use a built-in @REFERENCE_TYPE@ genome index</option>
+                <option value="history">Use a genome from history and build index</option>
+            </param>
+            <when value="cached">
+                <param name="ref_file" type="select" label="Using @REFERENCE_TYPE@ reference genome" help="Select @REFERENCE_TYPE@ genome from the list" optional="True">
+                    <options from_data_table="bwa_mem_indexes">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No indexes are available" />
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" optional="True"/>
+            </when>
+        </conditional>
+    </xml>
 </macros>
--- a/test-data/three_cluster_out.gff	Thu May 11 05:33:27 2017 -0400
+++ b/test-data/three_cluster_out.gff	Wed Jun 21 09:38:25 2017 -0400
@@ -1,4 +1,3 @@
 ##gff-version 3
-3R	findcluster	TE	13373515	13373524	22	+	.	ID=extended_and_annotated_roi.bam_0;genotype=homozygous;genotype_likelihoods=1.8828617809e-31,9.53673406912e-07,0.999999046327;left_insert=0,CTCGGAATGTATCTAACTAACAAACTCATATCAAATATAAGCAAGTGCGCCAATTCGTATGCATATGGACATATGGACATATACATATAGTAACATAATATGCTTCTCATATTACGTTTACATACTTACACTAATTGTACATACAATCTTGCACATGCATAAACACATCAAACCAGCTTACATTTTTACTTACACTTAAGCGCATGATTTGTTGTGCATCCATACCGTTATTTTTCC;left_support=11;non_support=0;right_insert=0,GACAACGAAGAAATAAAGATCCAAACTAAAAAAATACCTCGTGTTGATTCTGAAACTTCTTTAAAGGCGTTGATCTTAGTCAAACGACGGATCATTTGTTCGACTCGAATAGTAAAATACGTAAGTATATAGATAGTCTATATTAATTTTAAAAGCTCAAAGGGGCGCAAGTCTCTCTCATCAGTTGTGTCTTTAGTTTTTTTGATTTGGTTTGGTG;right_support=11;valid_TSD=False
-3R	findcluster	TE	13374595	13374595	2	+	.	ID=extended_and_annotated_roi.bam_1;genotype=homozygous;genotype_likelihoods=0.0106846062525,0.329771797916,0.659543595832;left_insert=0,GTTCACCCGCGTCCGAGTTCCTGCTCCACTACTCCCTGGCTGCTGACTCACTGTTGTTATAGGGGTGGCTTCCCCTCTGTTCTTCCTGGGGGAATGCTGCATCTTCCCCAGCTCCAAAATGGCGG;left_support=2;non_support=0;right_insert=;right_support=0;valid_TSD=False
-3R	findcluster	TE	13374677	13374677	1	+	.	ID=extended_and_annotated_roi.bam_2;genotype=heterozygous;genotype_likelihoods=0.212598425197,0.393700787402;left_insert=;left_support=0;non_support=0;right_insert=;right_support=1;valid_TSD=False
+3R	findcluster	TE	13373515	13373524	22	+	.	ID=extended_and_annotated_roi.bam_0;genotype=homozygous;genotype_likelihoods=1.8828617809e-31,9.53673406912e-07,0.999999046327;left_insert=0,CTCGGAATGTATCTAACTAACAAACTCATATCAAATATAAGCAAGTGCGCCAATTCGTATGCATATGGACATATGGACATATACATATAGTAACATAATATGCTTCTCATATTACGTTTACATACTTACACTAATTGTACATACAATCTTGCACATGCATAAACACATCAAACCAGCTTACATTTTTACTTACACTTAAGCGCATGATTTGTTGTGCATCCATACCGTTATTTTTCC;left_mate_support=8;left_support=11;non_support=0;right_insert=0,GACAACGAAGAAATAAAGATCCAAACTAAAAAAATACCTCGTGTTGATTCTGAAACTTCTTTAAAGGCGTTGATCTTAGTCAAACGACGGATCATTTGTTCGACTCGAATAGTAAAATACGTAAGTATATAGATAGTCTATATTAATTTTAAAAGCTCAAAGGGGCGCAAGTCTCTCTCATCAGTTGTGTCTTTAGTTTTTTTGATTTGGTTTGGTG;right_mate_support=6;right_support=11;valid_TSD=False
+3R	findcluster	TE	13374595	13374676	3	+	.	ID=extended_and_annotated_roi.bam_1;genotype=homozygous;genotype_likelihoods=0.00038864889331,0.199922270221,0.799689080885;left_insert=0,GTTCACCCGCGTCCGAGTTCCTGCTCCACTACTCCCTGGCTGCTGACTCACTGTTGTTATAGGGGTGGCTTCCCCTCTGTTCTTCCTGGGGGAATGCTGCATCTTCCCCAGCTCCAAAATGGCGG;left_mate_support=2;left_support=2;non_support=0;right_insert=;right_mate_support=1;right_support=1;valid_TSD=False
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Jun 21 09:38:25 2017 -0400
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of indexes in the BWA mapper format for BWA versions 0.6 and higher including BWA MEM and ALN-->
+    <table name="bwa_mem_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bwa_mem_index.loc" />
+    </table>
+</tables>
--- a/update_mapq.xml	Thu May 11 05:33:27 2017 -0400
+++ b/update_mapq.xml	Wed Jun 21 09:38:25 2017 -0400
@@ -1,7 +1,7 @@
-<tool id="update_mapq" name="Update MAPQ score" version="0.3.24">
+<tool id="update_mapq" name="Update MAPQ score" version="0.3.25">
     <description>of supplementary alignments</description>
     <requirements>
-        <requirement type="package" version="0.3.24">readtagger</requirement>
+        <requirement type="package" version="0.3.25">readtagger</requirement>
     </requirements>
     <version_command>update_mapq --version</version_command>
     <command detect_errors="aggressive"><![CDATA[
--- a/write_supplementary_fastq.xml	Thu May 11 05:33:27 2017 -0400
+++ b/write_supplementary_fastq.xml	Wed Jun 21 09:38:25 2017 -0400
@@ -1,7 +1,7 @@
-<tool id="write_supplementary_fastq" name="Extract supplementary alignments" version="0.3.24">
+<tool id="write_supplementary_fastq" name="Extract supplementary alignments" version="0.3.25">
     <description>from SAM/BAM alignment files as FASTQ</description>
     <requirements>
-        <requirement type="package" version="0.3.24">readtagger</requirement>
+        <requirement type="package" version="0.3.25">readtagger</requirement>
     </requirements>
     <version_command>write_supplementary_fastq --version</version_command>
     <command detect_errors="aggressive"><![CDATA[