view mothur/tools/mothur/screen.seqs.xml @ 31:a3eed59297ea

Patches courtesy of Peter Briggs, Bioinformatics Core Facility University of Manchester make.contigs.xml.patch:# make.contigs.xml.patch make.contigs.xml.patch:# make.contigs.xml.patch:# 1. Fix cosmetic typo in <description> (forard -> forward) make.contigs.xml.patch:# 2. Address error due to having 'mismatch' as the name for both an input and an output parameter: make.contigs.xml.patch:# rename output parameter to 'cmismatch' make.contigs.xml.patch:# 3. Remove 'threshold' parameter: make.contigs in mothur doesn't support a 'threshold' parameter metagenomics.py.patch:# metagenomics.py.patch metagenomics.py.patch:# metagenomics.py.patch:# 1. Groups class: names were being taken from the wrong field (affected shhh.flows tool) metagenomics.py.patch:# 2. Axes class: make 'sniff' method more sensitive to try and restrict arbitrary tabular metagenomics.py.patch:# data uploads being sniffed as this type mothur_wrapper.py.patch:# mothur_wrapper.py.patch mothur_wrapper.py.patch:# mothur_wrapper.py.patch:# 1. Update 'cmd_dict' settings for shhh.flows and shhh.seqs (otherwise these functions will mothur_wrapper.py.patch:# fail on execution) mothur_wrapper.py.patch:# 2. Fix add_option calls defining '--match' and '--mismatch' command line options (otherwise mothur_wrapper.py.patch:# syntax error causes immediate failure) screen.seqs.xml.patch:# screen.seqs.xml.patch screen.seqs.xml.patch:# screen.seqs.xml.patch:# Replace pattern for align.report output file in definiting of 'results' parameter in screen.seqs.xml.patch:# <command> section (otherwise output_alignreport data item is empty). shhh.flows.xml.patch:# shhh.flows.xml.patch shhh.flows.xml.patch:# shhh.flows.xml.patch:# Replace 'format_source' with 'format' for output parameters (otherwise formats are not shhh.flows.xml.patch:# correctly assigned to output datasets) shhh.seqs.xml.patch:# shhh.seqs.xml.patch shhh.seqs.xml.patch:# shhh.seqs.xml.patch:# 1. Fix patterns in --result (in <command> section) for shhh_seqs.fasta and shhh_seqs.names shhh.seqs.xml.patch:# output files (otherwise files are not collected and associated data items are empty) shhh.seqs.xml.patch:# 2. Replace 'format_source' with 'format' for output parameters (otherwise formats are not shhh.seqs.xml.patch:# correctly assigned to output datasets) trim.flows.xml.patch:# trim.flows.xml.patch trim.flows.xml.patch:# trim.flows.xml.patch:# Remove erroneous space from --result definition in <command> section (otherwise causes tool trim.flows.xml.patch:# failure) trim.seqs.xml.patch:# trim.seqs.xml.patch trim.seqs.xml.patch:# trim.seqs.xml.patch:# 1. Remove reference to undefined 'oligo.allvalues' varible in <command> section (otherwise trim.seqs.xml.patch:# causes failure on execution) trim.seqs.xml.patch:# 2. Fix format for input parameter 'names' (format should be 'names' not 'name') trim.seqs.xml.patch:# 3. Add output parameter 'scrap_names' (to ensure consistent collection of all outputs) trim.seqs.xml.patch:# 4. Update --result definition in <command> section to collect both trim.names and scrap.names
author Jim Johnson <jj@umn.edu>
date Tue, 30 Jul 2013 09:26:31 -0500
parents 49058b1f8d3f
children 95d75b35e4d2
line wrap: on
line source

<tool id="mothur_screen_seqs" name="Screen.seqs" version="1.23.0">
 <description>Screen sequences</description>
 <command interpreter="python">
  mothur_wrapper.py 
  #import re, os.path
  --cmd='screen.seqs'
  #set results = ["'^mothur.\S+\.logfile$:'" + $logfile.__str__]
  #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.good.\2',$os.path.basename($input_fasta.__str__)) + ":'" + $out_file.__str__]
  #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.bad.accnos',$os.path.basename($input_fasta.__str__)) + ":'" + $bad_accnos.__str__]
  --outputdir='$logfile.extra_files_path'
  --tmpdir='${logfile.extra_files_path}/input'
  --fasta=$input_fasta
  #if int($start) >= 0:
   --start=$start
  #end if
  #if int($end) >= 0:
   --end=$end
  #end if
  #if int($minlength) >= 0:
   --minlength=$minlength
  #end if
  #if int($maxlength) >= 0:
   --maxlength=$maxlength
  #end if
  #if int($maxambig) >= 0:
   --maxambig=$maxambig
  #end if
  #if int($maxhomop) >= 0:
   --maxhomop=$maxhomop
  #end if
  #if int($criteria) >= 0:
   --criteria=$criteria
  #end if
  #if $optimize != None and $optimize.__str__ != "None":
   --optimize=$optimize
  #end if
  #if $input_qfile != None and $input_qfile.__str__ != "None":
   --qfile=$input_qfile
   #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.good.\2',$os.path.basename($input_qfile.__str__)) + ":'" + $output_qfile.__str__]
  #end if
  #if $input_names != None and $input_names.__str__ != "None":
   --name=$input_names
   #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.good.\2',$os.path.basename($input_names.__str__)) + ":'" + $output_names.__str__]
  #end if
  #if $input_groups != None and $input_groups.__str__ != "None":
   --group=$input_groups
   #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.good.\2',$os.path.basename($input_groups.__str__)) + ":'" + $output_groups.__str__]
  #end if
  #if $input_alignreport != None and $input_alignreport.__str__ != "None":
   --alignreport=$input_alignreport
   ###set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.good.\2',$os.path.basename($input_alignreport.__str__)) + ":'" + $output_alignreport.__str__]
   #set results = $results + ["'^\S+\.good\.align\.report$:'" + $output_alignreport.__str__]
  #end if
  #if $input_taxonomy != None and $input_taxonomy.__str__ != "None":
   --taxonomy=$input_taxonomy
   #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.good.\2',$os.path.basename($input_taxonomy.__str__)) + ":'" + $output_taxonomy.__str__]
  #end if
  --result=#echo ','.join($results)
  --processors=8
 </command>
 <inputs>
  <param name="input_fasta" type="data" format="fasta,align" label="fasta - Fasta to screen"/>
  <param name="start" type="integer" value="-1" label="start - Remove sequences that start after position (ignored when negative)"/>
  <param name="end" type="integer" value="-1" label="end - Remove sequences that end before position (ignored when negative)"/>
  <param name="minlength" type="integer" value="-1" label="minlength - Remove sequences shorter than (ignored when negative)"/>
  <param name="maxlength" type="integer" value="-1" label="maxlength - Remove sequences longer than (ignored when negative)"/>
  <param name="maxambig" type="integer" value="-1" label="maxambig - Remove sequences with ambiguous bases greater than (ignored when negative)"/>
  <param name="maxhomop" type="integer" value="-1" label="maxhomop - Remove sequences with homopolymers greater than (ignored when negative)"/>
  <param name="criteria" type="integer" value="-1" label="criteria - Percent of sequences that an optimize value must match to be retained(ignored when negative)"/>
  <param name="optimize" type="select" multiple="true" display="checkboxes" label="optimize - Optimize selected paramenters">
   <option value="start">start</option>
   <option value="end">end</option>
   <option value="minlength">minlength</option>
   <option value="maxlength">maxlength</option>
   <option value="maxambig">maxambig</option>
   <option value="maxhomop">maxhomop</option>
  </param>
  <param name="input_qfile" type="data" format="qual" optional="true" label="qfile - Sequence Quality file  to screen"/>
  <param name="input_names" type="data" format="names" optional="true" label="name - Sequence Names to screen"/>
  <param name="input_groups" type="data" format="groups" optional="true" label="group - Groups to screen"/>
  <param name="input_alignreport" type="data" format="align.report" optional="true" label="alignreport - Align Report to screen"/>
  <param name="input_taxonomy" type="data" format="taxonomy" optional="true" label="taxonomy - Taxonomy to screen"/>
 </inputs>
 <outputs>
  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
  <data format_source="input_fasta" name="out_file" label="${tool.name} on ${on_string}: good.${input_fasta.datatype.file_ext}" />
  <data format="accnos" name="bad_accnos" label="${tool.name} on ${on_string}: bad.accnos" />
  <data format_source="input_qfile" name="output_qfile" label="${tool.name} on ${on_string}: qfile" >
   <filter>input_qfile != None</filter>
  </data>
  <data format="names" name="output_names" label="${tool.name} on ${on_string}: names" >
   <filter>input_names != None</filter>
  </data>
  <data format="groups" name="output_groups" label="${tool.name} on ${on_string}: groups" >
   <filter>input_groups != None</filter>
  </data>
  <data format="align.report" name="output_alignreport" label="${tool.name} on ${on_string}: align.report" >
   <filter>input_alignreport != None</filter>
  </data>
  <data format="taxonomy" name="output_taxonomy" label="${tool.name} on ${on_string}: taxonomy" >
   <filter>input_taxonomy != None</filter>
  </data>
 </outputs>
 <requirements>
  <requirement type="package" version="1.27">mothur</requirement>
 </requirements>
 <tests>
 </tests>
 <help>
**Mothur Overview**

Mothur_, initiated by Dr. Patrick Schloss and his software development team
in the Department of Microbiology and Immunology at The University of Michigan,
provides bioinformatics for the microbial ecology community.

.. _Mothur: http://www.mothur.org/wiki/Main_Page

**Command Documenation**

The screen.seqs_ command enables you to keep sequences that fulfill certain user defined criteria. Furthermore, it enables you to cull those sequences not meeting the criteria from a name_, group_, or align.report_ file. 

.. _name: http://www.mothur.org/wiki/Name_file
.. _group: http://www.mothur.org/wiki/Group_file
.. _align.report: http://www.mothur.org/wiki/Align.seqs
.. _screen.seqs: http://www.mothur.org/wiki/Screen.seqs


 </help>
</tool>