diff tgsgapcloser.xml @ 0:86fa46d3ce2e draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/tgsgapcloser commit dcc6bd722244004ed2d5ac49d53a4e1d71366b1a"
author bgruening
date Sun, 14 Nov 2021 21:28:36 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tgsgapcloser.xml	Sun Nov 14 21:28:36 2021 +0000
@@ -0,0 +1,246 @@
+<tool id="tgsgapcloser" name="TGS-GapCloser" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
+    <description>fills the N-gap of error-prone long reads</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="xrefs"/>
+    <version_command>tgsgapcloser --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        PILON=\$(which pilon)
+        PILON_JAR=\$(readlink -f \$PILON).jar
+        && tgsgapcloser    
+        --scaff $scaff 
+        --reads $reads
+        --output output
+        #if $error_conditional.error_options == 'pilon'
+            --pilon \$PILON_JAR
+            --ngs $error_conditional.ngs
+            --samtools `which samtools`
+            --java `which java`
+            --pilon_mem \${GALAXY_MEMORY_MB:-4096}M
+        #elif $error_conditional.error_options == 'racon'
+            --racon `which racon`
+        #else
+            --ne
+        #end if
+        --tgstype $tgstype_conditional.tgstype
+        --min_idy $tgstype_conditional.min_idy
+        --min_match $tgstype_conditional.min_match
+        --chunk $chunk
+        $g_check
+        --thread  \${GALAXY_SLOTS:-16}
+    ]]></command>
+    <inputs>
+        <param argument="--scaff" type="data" format="fasta" multiple="true" label="Scaffold file" />
+        <param argument="--reads" type="data" format="fasta" multiple="true" label="Input reads" />
+        <conditional name="error_conditional">
+            <param name="error_options" type="select" label="Error correction">
+                <option value="ne">Do not error correct</option>
+                <option value="racon">Racon</option>
+                <option value="pilon">Pilon</option>
+            </param>
+            <when value="ne"/>
+            <when value="racon">
+                <param argument="--r_round" type="integer" min="0" max="10" value="1" label="Number of Racon error-correction rounds" 
+                    help="Although multiple rounds of racon can increase the quality of an assembly there are indications that it also 
+                        fragments the assembly and may decrease quality by removing structural variants and SNPs. Published assembly workflows 
+                        differ in the number of rounds but rarely apply more than 4 rounds of racon" />
+            </when>
+            <when value="pilon">
+                <param argument="--ngs" type="data" format="fastq,fastq.gz" label="Illumina reads" 
+                    help="Pilon can utilize Illumina short reads mapped to the draft assembly to 
+                        improve the local accuracy of the sequence by correcting sequence errors, 
+                        fixing misassemblies, and filling gaps"/>
+                <param argument="--p_round" type="integer" min="0" max="10" value="3" label="Number of Pilon error-correction rounds" />
+            </when>
+        </conditional>
+        <conditional name="tgstype_conditional">
+            <param argument="--tgstype" type="select" label="Type of third generation reads">
+                <option value="ont" selected="true">Oxford Nanopore Technologies (ONT)</option>
+                <option value="pb">PacBio (pb)</option>
+            </param>
+            <when value="ont">
+                <param argument="--min_idy" type="float" min="0" max="1" value="0.3" label="Minimum identity for filtering candidate sequences"/>
+                <param argument="--min_match" type="integer" min="0" max="1000" value="300" label="Minimum matched length for filtering candidate sequences"/>
+            </when>
+            <when value="pb">
+                <param argument="--min_idy" type="float" min="0" max="1" value="0.2" label="Minimum identity for filtering candidate sequences"/>
+                <param argument="--min_match" type="integer" min="0" max="1000" value="200" label="Minimum matched length for filtering candidate sequences"/>
+            </when>
+        </conditional>
+        <param argument="--chunk" type="integer" min="0" max="20" value="3" label="Chunks for error correction" help="Split candidates into # of chunks to separately correct errors" />
+        <param argument="--g_check" type="boolean" truevalue="--g_check" falsevalue="" label="Gap-size diff check"/>
+        <param name="output_options" type="select" multiple="true" optional="true" display="checkboxes" label="Output files">
+            <option value="log_file">General log file</option>
+            <option value="gapfill_log">Gapfill log file</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="log" format="txt" from_work_dir="output.fill.log" label="${tool.name} on ${on_string}: log">
+            <filter>output_options and 'log_file' in output_options</filter>
+        </data>
+        <data name="final_assembly" format="fasta" from_work_dir="output.scaff_seqs" label="${tool.name} on ${on_string}: final assembly"/>
+
+        <data name="fill_details" format="txt" from_work_dir="output.gap_fill_detail" label="${tool.name} on ${on_string}: gap fill details">
+            <filter>output_options and 'gapfill_log' in output_options</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!--Test 01: no correction-->
+        <test expect_num_outputs="3">
+            <param name="scaff" value="scaffold.fasta" ftype="fasta"/>
+            <param name="reads" value="ont_reads.fasta" ftype="fasta"/>
+            <conditional name="error_conditional">
+                <param name="error_options" value="ne"/>
+            </conditional>
+            <conditional name="tgstype_conditional">
+                <param name="tgstype" value="ont"/>
+                <param name="min_idy" value="0.3"/>
+                <param name="min_match" value="300"/>
+            </conditional>
+            <param name="chunk" value="3"/>
+            <param name="g_check" value="false"/>
+            <param name="output_options" value="log_file,gapfill_log"/>
+            <output name="final_assembly" file="test_01_final_assembly.fasta" ftype="fasta"/>
+            <output name="log" ftype="txt">
+                <assert_contents>
+                    <has_text text="TGSGapCloser start now"/>
+                    <has_text text="the one read provide filler choose count freq for a gap"/>
+                </assert_contents>
+            </output>
+            <output name="fill_details" file="test_01_gapfill.log" ftype="txt"/>
+        </test>
+        <!--Test 02: correction with racon-->
+        <test expect_num_outputs="1">
+            <param name="scaff" value="scaffold.fasta" ftype="fasta"/>
+            <param name="reads" value="ont_reads.fasta" ftype="fasta"/>
+            <conditional name="error_conditional">
+                <param name="error_options" value="racon"/>
+                <param name="r_round" value="2"/>
+            </conditional>
+            <conditional name="tgstype_conditional">
+                <param name="tgstype" value="ont"/>
+                <param name="min_idy" value="0.3"/>
+                <param name="min_match" value="300"/>
+            </conditional>
+            <param name="chunk" value="3"/>
+            <param name="g_check" value="false"/>
+            <output name="final_assembly" file="test_02_final_assembly.fasta" ftype="fasta"/>
+        </test>
+        <!--Test 03: correction pilon-->
+        <test expect_num_outputs="1">
+            <param name="scaff" value="scaffold.fasta" ftype="fasta"/>
+            <param name="reads" value="ont_reads.fasta" ftype="fasta"/>
+            <conditional name="error_conditional">
+                <param name="error_options" value="pilon"/>
+                <param name="ngs" value="ngs_reads.fastq" ftype="fastq"/>
+                <param name="r_round" value="1"/>
+            </conditional>
+            <conditional name="tgstype_conditional">
+                <param name="tgstype" value="ont"/>
+                <param name="min_idy" value="0"/>
+                <param name="min_match" value="0"/>
+            </conditional>
+            <param name="chunk" value="1"/>
+            <param name="g_check" value="true"/>
+            <output name="final_assembly" file="test_03_final_assembly.fasta" ftype="fasta"/>
+        </test>
+        <!--Test 04: correction with racon and pacbio-->
+        <test expect_num_outputs="1">
+            <param name="scaff" value="scaffold.fasta" ftype="fasta"/>
+            <param name="reads" value="ont_reads.fasta" ftype="fasta"/>
+            <conditional name="error_conditional">
+                <param name="error_options" value="racon"/>
+                <param name="r_round" value="2"/>
+            </conditional>
+            <conditional name="tgstype_conditional">
+                <param name="tgstype" value="pacbio"/>
+                <param name="min_idy" value="0.2"/>
+                <param name="min_match" value="200"/>
+            </conditional>
+            <param name="chunk" value="2"/>
+            <param name="g_check" value="false"/>
+            <output name="final_assembly" file="test_04_final_assembly.fasta" ftype="fasta"/>
+        </test>
+        <!--Test 05: fastq.gz files-->
+        <test expect_num_outputs="1">
+            <param name="scaff" value="scaffold.fasta" ftype="fasta"/>
+            <param name="reads" value="ont_reads.fasta" ftype="fasta"/>
+            <conditional name="error_conditional">
+                <param name="error_options" value="pilon"/>
+                <param name="ngs" value="ngs_reads.fastq.gz" ftype="fastq.gz"/>
+                <param name="r_round" value="1"/>
+            </conditional>
+            <conditional name="tgstype_conditional">
+                <param name="tgstype" value="ont"/>
+                <param name="min_idy" value="0"/>
+                <param name="min_match" value="0"/>
+            </conditional>
+            <param name="chunk" value="1"/>
+            <param name="g_check" value="false"/>
+            <output name="final_assembly" file="test_05_final_assembly.fasta" ftype="fasta"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**Purpose**
+
+TGS-GapCloser is a gap-closing software tool that uses error-prone long reads generated by third-generation-sequence techniques (Pacbio,
+Oxford Nanopore, etc.) or preassembled contigs to fill N-gap in the genome assembly. This tool can close gaps in large genome assemblies
+using raw long reads quickly and cost-effectively. The final assemblies generated by TGS-GapCloser have improved contiguity and 
+completeness while maintaining high accuracy.
+
+----
+
+.. class:: infomark
+
+**Quick usage**
+
+Input reads can only be in FASTA format. Both raw reads and pre-error-corrected reads are acceptable as input. If only raw long reads are 
+provided, it polishes raw TGS reads by calling Racon.If additional NGS short reads are available, it polishes raw TGS reads by calling Pilon.
+
+----
+
+.. class:: infomark
+
+**Gap fill details**
+
+
+Format of a detailed information of gap fill report:
+
+- Each scaffold name is followed by its data lines.
+- A data line consists of 3 or 5 columns and describes the source of each segment in the final sequence:
+- Column 1 is the segment's first bp position in the final sequence.
+- Column 2 is the segment's last bp position in the final sequence.
+- Column 3 is the segment's type , 'S' , 'N' or 'F'.
+- 'S' means this segment is a segment of the input sequence and this line includes other two more columns:
+- Column 4 is the segment's first bp position in the input sequence.
+- Column 5 is the segment's last bp position in the input sequence.
+- 'N' means this segment is a N area.
+- 'F' means this segment is a filled sequence from TGS reads.
+
+----
+
+.. class:: infomark
+
+**Algorithm and implementation of TGS-GapCloser**
+
+This is a brief description of the TGS-GapCloser algorithm. Please refer to the manuscript for more detailed information.
+
+TGS-GapCloser is coded in the C++ programing language (requires GCC 4.4+). It uses minimap2 to obtain alignments, and Pilon (requires Java runtime 1.7+)
+or Racon (requires GCC 4.8+) to correct candidate fragments. The algorithm automatically identifies gaps and tries to find the best matched long-read 
+fragments to close gaps or merge adjacent scaftigs. To accelerate the gap closure without losing efficiency and accuracy, TGS-GapCloser only selects a 
+limited number of fragmented long reads as candidates for subsequent error correction and competition.
+
+TGS-GapCloser can accept as input any type of TGS long reads or other pre-assembled contigs to fill gaps in a draft assembly in the 4 steps :
+	(i) Identification of gap regions in the draft assembly; 
+	(ii) Acquisition of candidates from the alignments of long reads against gaps; 
+	(iii) Base-level error correction of alternative sub-long reads; and 
+	(iv) Gap closure using the error-corrected candidates with the highest score for each gap or linkage of the neighboring scaftigs with overlaps.
+
+  ]]></help>
+    <expand macro="citations" />
+</tool>