diff sopra_wpc.xml @ 0:988d5a82291a draft

Uploaded
author crs4
date Thu, 24 Oct 2013 14:02:10 -0400
parents
children 87ffe493b6c1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sopra_wpc.xml	Thu Oct 24 14:02:10 2013 -0400
@@ -0,0 +1,69 @@
+<tool id="sopra_wpc" name="SOPRA with prebuilt contigs" version="0.1">
+  <description>for Illumina workflow</description>
+  <requirements>
+    <requirement type="package" version="1.4.6">sopra</requirement>
+    <requirement type="package" version="1.0.0">bowtie</requirement>
+  </requirements>
+  <command interpreter="python">
+    sopra_wpc.py
+    #for $cr in $contigs_repeat
+      --contigs ${cr.contigs_file}
+    #end for
+    #for $mr in $mate_repeat
+      --mate ${mr.mate_file}
+      -d ${mr.insert_size}
+    #end for
+    -v $max_mismatches -c $c_option -w $w_option -L $L_option --h_option $h_option --scaffolds $scaffolds_file --logfile $logfile
+  </command>
+  <inputs>
+    <repeat name="contigs_repeat" title="Contigs file" min="1">
+      <param name="contigs_file" type="data" format="fasta" label="Contigs" help="FASTA format" />
+    </repeat>
+    <repeat name="mate_repeat" title="Paired-end Illumina library" min="1">
+      <param name="mate_file" type="data" format="fasta" label="Paired-end Illumina library" help="FASTA format" />
+      <param name="insert_size" type="integer" value="" label="Insert size" help="Insert size for the library (-d)" />
+    </repeat>
+    <param name="max_mismatches" type="integer" min="0" max="3" value="0" label="Maximum number of mismatches when aligning reads on contigs with Bowtie (-v)" help="May be 0, 1, 2, or 3" />
+    <param name="c_option" type="integer" value="5" label="If the number of times a read and its reverse complement appear in the library is equal to or more than this value, the pairing information from that read will be disregarded (-c)" />
+    <param name="w_option" type="integer" value="4" label="Minimum number of links between two contigs (-w)" />
+    <param name="L_option" type="integer" value="150" label="Minimum length of contigs to be used in scaffold assembly (-L)" />
+    <param name="h_option" type="float" value="2.2" label="h value (-h)" help="High coverage contigs (above mean_coverage + h x std_coverage) are not considered in the scaffold assembly mainly to exclude reads from repetitive regions" />
+  </inputs>
+  <outputs>
+    <data format="fasta" name="scaffolds_file" label="${tool.name} on ${on_string}: scaffolds_sopra.fasta"/>
+    <data format="txt" name="logfile" label="${tool.name} on ${on_string}: log"/>
+  </outputs>
+  <help>
+**What it does**
+
+SOPRA is an assembly tool for mate pair/paired-end data generated by high-throughput sequencing technologies, e.g. Illumina and SOLiD platforms.
+
+The input paired-end FASTA file can be obtained with:
+FR reads -> *FASTQ interlacer on paired end reads* followed by *FASTQ to FASTA* converter
+RF reads -> *Reverse-Complement*, *FASTQ interlacer on paired end reads* followed by *FASTQ to FASTA* converter
+
+.. class:: infomark
+
+**TIP:** Try trimming the end of short reads before feeding it to the assembler to remove the error prone bases (e.g. last 10 to 20 bps) and check if it improves the assembly.
+
+-----
+
+**License and citation**
+
+This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.
+
+.. _CRS4 Srl.: http://www.crs4.it/
+.. _MIT license: http://opensource.org/licenses/MIT
+
+If you use this tool in Galaxy, please cite |Cuccuru2013|_.
+
+.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted*
+.. _Cuccuru2013: http://orione.crs4.it/
+
+This tool uses `SOPRA`_, which is licensed separately. Please cite |Dayarian2010|_.
+
+.. _SOPRA: http://www.physics.rutgers.edu/~anirvans/SOPRA/
+.. |Dayarian2010| replace:: Dayarian, A., Michael, T. P., Sengupta, A. M. (2010) SOPRA: Scaffolding algorithm for paired reads via statistical optimization. *BMC Bioinformatics* 11, 345
+.. _Dayarian2010: http://www.biomedcentral.com/1471-2105/11/345/
+  </help>
+</tool>