diff Tasmanian.xml @ 0:bc0b40dec7d2 draft

"planemo upload for repository https://github.com/nebiolabs/tasmanian-mismatch commit a64a371ca1ed117c9bd8af743d847128fbce461c"
author iuc
date Wed, 20 May 2020 17:23:42 -0400
parents
children b15fbf90db53
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tasmanian.xml	Wed May 20 17:23:42 2020 -0400
@@ -0,0 +1,158 @@
+<tool id="tasmanian_mismatch" name="Analysis of artifacts with Tasmanian" version="0.1.3" profile="18.09">
+  <requirements>
+    <requirement type="package" version="0.1.3">tasmanian-mismatch</requirement> 
+    <requirement type="package" version="1.9">samtools</requirement>
+  </requirements>
+  <command detect_errors="exit_code">
+    <![CDATA[
+   
+     #set $reference_fasta_filename = "localref.fa"
+
+     #if str( $reference_source.reference_source_selector ) == "history":
+        ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' &&
+     #else:
+        #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+     #end if
+
+    samtools view '${bam_input}' | 
+   
+     #if $bed_filename
+        run_intersections -b '$bed_filename' | 
+     #end if  
+ 	   
+     run_tasmanian
+        -q '${basequality}'
+        -s '${softclips}'
+        -m '${mapquality}'
+        -c '${confidence}'
+        -r '${reference_fasta_filename}' > '${output_table}'
+
+  ]]></command>
+  <inputs>
+    <!-- Bam alignment file -->
+    <param type="data" name="bam_input" label="Bam/Sam alignemnt file" format="bam" help="Specify BAM/SAM dataset. If not using a bed file, this file MUST BE SORTED"/>
+    <!-- reference genome upload -->
+    <conditional name="reference_source">
+      <param name="reference_source_selector" type="select" label="Reference genome" help="You can select a reference genome from your history or use a built-in index (Local cache)">
+        <option value="cached">Local cache</option>
+        <option value="history">History</option>
+      </param>
+      <when value="cached">
+        <param name="ref_file" type="select" label="Select the reference genome from the list">
+          <options from_data_table="all_fasta">
+            <filter type="sort_by" column="2" />
+            <validator type="no_options" message="No indexes are available" />
+          </options>
+        </param>
+      </when>
+      <when value="history">
+        <param name="ref_file" type="data" format="fasta" label="Use reference genome from history" help="You can first upload a FASTA sequence to the history" />
+      </when>
+    </conditional>
+            
+    <!-- bed file -->       
+    <param name="bed_filename" type="data" format="bed" optional="true" label="Select a bed file" help="The bed file should contain at least: &quot;chrN&quot;, &quot;start&quot; and &quot;stop&quot;, and is tab separated."/>
+
+    <!-- Additional parameters -->
+    <param name="confidence" label="Boundary" type="integer" value="20" min="0" max="100"
+        help="Number of bases in boundary region, from 0 to length of the read (read help below). Default=20"/> 
+    <param name="softclips" label="Choose an action with softclips" type="select" display="radio"
+        help="How sofclips whould be treated. Values include 0,1 or 2 (read the help below). Default=0">
+      <option value="1">Never use softcliped bases</option>
+      <option value="2">Always use softcliped bases</option>
+      <option value="0" selected="True">Automatic desicion (Default)</option>
+    </param>
+    <param name="mapquality" label="Map quality" type="integer" min="0" max="70" value="20" help="Exclude reads with lower mapQ than this number. Default=20"/>
+    <param name="basequality" label="Base quality" type="integer" min="0" max="70" value="20" help="Exclude bases with lower Base quality than this number. Default=20"/>
+    <param name="keepHTML_conditional" type="select" label="keep HTML output file?">
+      <option value="yes">Yes</option>
+      <option value="no">No</option>
+    </param>    
+  </inputs>
+
+  <outputs>
+    <data name="output_table" format="txt" />
+    <data format="html" name="html_file" from_work_dir="Tasmanian_artifact_report.html" label="tasmanian-mismatch results table">
+      <filter>keepHTML_conditional == "yes"</filter>
+    </data>
+  </outputs>
+
+  <tests>
+    <!-- test when reference from history with bed-->
+    <test>
+      <param name="bam_input" value="test2.bam" ftype="bam"/>
+      <param name="reference_source_selector" value="history"/>
+      <param name="ref_file" value="small_region.fa"/>
+      <param name="bed_filename" value="test2.bed" ftype="bed"/>
+      <output name="output_table" file="test2-bed.output" lines_diff="4"/>
+    </test>
+    <!-- test when reference from history without bed-->
+    <test>
+      <param name="bam_input" value="test2.bam" ftype="bam"/>
+      <param name="reference_source_selector" value="history"/>
+      <param name="ref_file" value="small_region.fa"/>
+      <output name="output_table" file="test2-nobed.output" lines_diff="4"/>
+    </test>
+    <!-- test when reference from cached-->
+    <test>
+      <param name="bam_input" value="test2.bam" ftype="bam" dbkey="hg38"/>
+      <param name="reference_source_selector" value="cached"/>
+      <param name="ref_file" value="hg38"/>
+      <output name="output_table" file="test2-nobed.output" lines_diff="4"/>
+    </test>
+  </tests>
+
+  <help>
+    <![CDATA[
+
+**What it does**
+
+This tool counts the number/proportion of mismatches per position along the read, 
+for each read (see figure below).
+
+.. image:: ${static_path}/images/snapshot_good.jpg
+    :height: 350                                                            
+    :width: 650   
+
+-----
+
+**What is special**
+
+By providing a bed file, tasmanian-mismatch will count mismatches from all regions depicted in the figure below, 
+and will report them separately. Also, a parameter defined as *"confidence"* allows including reads with >=
+bases in the boundary region in a separate group. This is useful when the bed refers to repeat regions. Since these 
+regions might not have been correctly placed in the assembly or are not the same in different individuals, we can 
+include this *confidence* repeat regions where we have high confidence on the reference genome to which we mapped the reads.
+
+.. image:: ${static_path}/images/intersections_tasmanian.jpg                
+    :height: 150                                                            
+    :width: 650 
+
+Softclips are critical in FFPE (Formalin-fixed paraffin-embedded) experiments as mismatches tend to accumulate at the ends of the reads. Most often, softclips 
+are all accepted during the analysis and many real mismatches are indirectly excluded from the analysis. Hence, this tool 
+provides different ways to deal with this: 
+
+The *softclips* field allows for 3 different ways at treating softclips:    
+0) Exclude these region if there is less than 2/3 identity with the reference genome  
+1) Exclude all softclipped bases  
+2) Include all softclipped bases  
+
+.. class:: warningmark   
+
+BAM/SAM file must be **sorted** if not using a bed file.
+  
+    ]]>
+  </help>
+  <citations>
+    <citation type="bibtex">
+      @misc{githubtasmanian,
+        author = {Langhorst B., Others, Erijman A.},
+        year = {2020},
+        title = {TBD},
+        publisher = {GitHub},
+        journal = {GitHub repository},
+        url = {https://github.com/nebiolabs/tasmanian-mismatch},
+      }
+    </citation>
+  </citations>
+</tool>