Mercurial > repos > devteam > fastq_masker_by_quality
changeset 2:eb592e9ec47a draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_masker_by_quality commit f2582539542b33240234e8ea6093e25d0aee9b6a
author | devteam |
---|---|
date | Sat, 30 Sep 2017 14:58:34 -0400 |
parents | 9d234265981e |
children | 9282652e7234 |
files | fastq_masker_by_quality.py fastq_masker_by_quality.xml tool_dependencies.xml |
diffstat | 3 files changed, 53 insertions(+), 144 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_masker_by_quality.py Wed Nov 11 12:41:24 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,83 +0,0 @@ -#Dan Blankenberg -import string -from optparse import OptionParser -from galaxy_utils.sequence.fastq import fastqReader, fastqWriter - - -def get_score_comparer( operator ): - if operator == 'gt': - return compare_gt - elif operator == 'ge': - return compare_ge - elif operator == 'eq': - return compare_eq - elif operator == 'lt': - return compare_lt - elif operator == 'le': - return compare_le - elif operator == 'ne': - return compare_ne - raise 'Invalid operator provided: %s' % operator - -def compare_gt( quality_score, threshold_value ): - return quality_score > threshold_value - -def compare_ge( quality_score, threshold_value ): - return quality_score >= threshold_value - -def compare_eq( quality_score, threshold_value ): - return quality_score == threshold_value - -def compare_ne( quality_score, threshold_value ): - return quality_score != threshold_value - -def compare_lt( quality_score, threshold_value ): - return quality_score < threshold_value - -def compare_le( quality_score, threshold_value ): - return quality_score <= threshold_value - -class BaseReplacer( object ): - def __init__( self, replace_character ): - self.replace_character = replace_character - def __call__( self, base_character ): - return self.replace_character - -def main(): - usage = "usage: %prog [options] input_file output_file" - parser = OptionParser( usage=usage ) - parser.add_option( '-f', '--format', dest='format', type='choice', default='sanger', choices=( 'sanger', 'solexa', 'illumina' ), help='FASTQ variant type' ) - parser.add_option( '-m', '--mask_character', dest='mask_character', default='N', help='Mask Character to use' ) - parser.add_option( '-c', '--score_comparison', type="choice", dest='score_comparison', default='le', choices=('gt','ge','eq','lt', 'le', 'ne' ), help='Mask base when score is' ) - parser.add_option( '-s', '--quality_score', type="float", dest='quality_score', default='0', help='Quality Score' ) - parser.add_option( "-l", "--lowercase", action="store_true", dest="lowercase", default=False, help="Use lowercase masking") - ( options, args ) = parser.parse_args() - - if len ( args ) != 2: - parser.error( "Need to specify an input file and an output file" ) - - score_comparer = get_score_comparer( options.score_comparison ) - - if options.lowercase: - base_masker = string.lower - else: - base_masker = BaseReplacer( options.mask_character ) - - out = fastqWriter( open( args[1], 'wb' ), format = options.format ) - - num_reads = None - num_reads_excluded = 0 - for num_reads, fastq_read in enumerate( fastqReader( open( args[0] ), format = options.format ) ): - sequence_list = list( fastq_read.sequence ) - for i, quality_score in enumerate( fastq_read.get_decimal_quality_scores() ): - if score_comparer( quality_score, options.quality_score ): - sequence_list[ i ] = base_masker( sequence_list[ i ] ) - fastq_read.sequence = "".join( sequence_list ) - out.write( fastq_read ) - - if num_reads is not None: - print "Processed %i %s reads." % ( num_reads + 1, options.format ) - else: - print "No valid FASTQ reads were provided." - -if __name__ == "__main__": main()
--- a/fastq_masker_by_quality.xml Wed Nov 11 12:41:24 2015 -0500 +++ b/fastq_masker_by_quality.xml Sat Sep 30 14:58:34 2017 -0400 @@ -1,61 +1,59 @@ -<tool id="fastq_masker_by_quality" name="FASTQ Masker" version="1.0.0"> - <description>by quality score</description> - <requirements> - <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> - </requirements> - <command interpreter="python">fastq_masker_by_quality.py '$input_file' '$output_file' -f '${input_file.extension[len( 'fastq' ):]}' -s '${quality_score}' -c '${score_comparison}' - #if $mask_type.value == 'lowercase' - --lowercase - #else - -m '${mask_type}' - #end if - </command> - <inputs> - <param name="input_file" type="data" format="fastqsanger" label="File to mask" /> - <param name="mask_type" type="select" label="Mask input with"> - <option value="N">N's</option> - <option value="lowercase">Lowercase</option> - </param> - <param name="score_comparison" type="select" label="When score is"> - <option value="le" selected="True">Less than or equal</option> - <option value="lt">Less than</option> - <option value="eq">Equal to</option> - <option value="ne">Not Equal to</option> - <option value="ge">Greater than</option> - <option value="gt">Greater than or equal</option> - </param> - <param name="quality_score" type="integer" value="0" label="Quality score"/> - </inputs> - <outputs> - <data name="output_file" format="fastqsanger" /> - </outputs> - <tests> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="mask_type" value="N" /> - <param name="score_comparison" value="le" /> - <param name="quality_score" value="20" /> - <output name="output_file" file="sanger_full_range_masked_N.fastqsanger" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="mask_type" value="lowercase" /> - <param name="score_comparison" value="le" /> - <param name="quality_score" value="20" /> - <output name="output_file" file="sanger_full_range_masked_lowercase.fastqsanger" /> - </test> - </tests> - <help> +<tool id="fastq_masker_by_quality" name="FASTQ Masker" version="1.1.1"> + <description>by quality score</description> + <requirements> + <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement> + </requirements> + <command><![CDATA[ +gx-fastq-masker-by-quality '$input_file' '$output_file' -f '${input_file.extension[len( 'fastq' ):]}' -s ${quality_score} -c ${score_comparison} +#if $mask_type.value == 'lowercase' + --lowercase +#else + -m ${mask_type} +#end if + ]]></command> + <inputs> + <param name="input_file" type="data" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="File to mask" /> + <param name="mask_type" type="select" label="Mask input with"> + <option value="N">N's</option> + <option value="lowercase">Lowercase</option> + </param> + <param name="score_comparison" type="select" label="When score is"> + <option value="le" selected="true">Less than or equal</option> + <option value="lt">Less than</option> + <option value="eq">Equal to</option> + <option value="ne">Not Equal to</option> + <option value="ge">Greater than</option> + <option value="gt">Greater than or equal</option> + </param> + <param name="quality_score" type="integer" value="0" label="Quality score"/> + </inputs> + <outputs> + <data name="output_file" format_source="input_file" /> + </outputs> + <tests> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="mask_type" value="N" /> + <param name="score_comparison" value="le" /> + <param name="quality_score" value="20" /> + <output name="output_file" file="sanger_full_range_masked_N.fastqsanger" ftype="fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="mask_type" value="lowercase" /> + <param name="score_comparison" value="le" /> + <param name="quality_score" value="20" /> + <output name="output_file" file="sanger_full_range_masked_lowercase.fastqsanger" ftype="fastqsanger" /> + </test> + </tests> + <help><![CDATA[ **What it does** This tool allows masking base characters in FASTQ format files dependent upon user specified quality score value and comparison method. This tool is not available for use on color space (csSanger) formats. - - </help> - - <citations> - <citation type="doi">10.1093/bioinformatics/btq281</citation> - </citations> - + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btq281</citation> + </citations> </tool>
--- a/tool_dependencies.xml Wed Nov 11 12:41:24 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="galaxy_sequence_utils" version="1.0.0"> - <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>