Mercurial > repos > devteam > fastq_masker_by_quality
comparison fastq_masker_by_quality.py @ 0:5a7b5751617b draft
Imported from capsule None
| author | devteam |
|---|---|
| date | Mon, 27 Jan 2014 09:25:39 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5a7b5751617b |
|---|---|
| 1 #Dan Blankenberg | |
| 2 import string | |
| 3 from optparse import OptionParser | |
| 4 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter | |
| 5 | |
| 6 | |
| 7 def get_score_comparer( operator ): | |
| 8 if operator == 'gt': | |
| 9 return compare_gt | |
| 10 elif operator == 'ge': | |
| 11 return compare_ge | |
| 12 elif operator == 'eq': | |
| 13 return compare_eq | |
| 14 elif operator == 'lt': | |
| 15 return compare_lt | |
| 16 elif operator == 'le': | |
| 17 return compare_le | |
| 18 elif operator == 'ne': | |
| 19 return compare_ne | |
| 20 raise 'Invalid operator provided: %s' % operator | |
| 21 | |
| 22 def compare_gt( quality_score, threshold_value ): | |
| 23 return quality_score > threshold_value | |
| 24 | |
| 25 def compare_ge( quality_score, threshold_value ): | |
| 26 return quality_score >= threshold_value | |
| 27 | |
| 28 def compare_eq( quality_score, threshold_value ): | |
| 29 return quality_score == threshold_value | |
| 30 | |
| 31 def compare_ne( quality_score, threshold_value ): | |
| 32 return quality_score != threshold_value | |
| 33 | |
| 34 def compare_lt( quality_score, threshold_value ): | |
| 35 return quality_score < threshold_value | |
| 36 | |
| 37 def compare_le( quality_score, threshold_value ): | |
| 38 return quality_score <= threshold_value | |
| 39 | |
| 40 class BaseReplacer( object ): | |
| 41 def __init__( self, replace_character ): | |
| 42 self.replace_character = replace_character | |
| 43 def __call__( self, base_character ): | |
| 44 return self.replace_character | |
| 45 | |
| 46 def main(): | |
| 47 usage = "usage: %prog [options] input_file output_file" | |
| 48 parser = OptionParser( usage=usage ) | |
| 49 parser.add_option( '-f', '--format', dest='format', type='choice', default='sanger', choices=( 'sanger', 'solexa', 'illumina' ), help='FASTQ variant type' ) | |
| 50 parser.add_option( '-m', '--mask_character', dest='mask_character', default='N', help='Mask Character to use' ) | |
| 51 parser.add_option( '-c', '--score_comparison', type="choice", dest='score_comparison', default='le', choices=('gt','ge','eq','lt', 'le', 'ne' ), help='Mask base when score is' ) | |
| 52 parser.add_option( '-s', '--quality_score', type="float", dest='quality_score', default='0', help='Quality Score' ) | |
| 53 parser.add_option( "-l", "--lowercase", action="store_true", dest="lowercase", default=False, help="Use lowercase masking") | |
| 54 ( options, args ) = parser.parse_args() | |
| 55 | |
| 56 if len ( args ) != 2: | |
| 57 parser.error( "Need to specify an input file and an output file" ) | |
| 58 | |
| 59 score_comparer = get_score_comparer( options.score_comparison ) | |
| 60 | |
| 61 if options.lowercase: | |
| 62 base_masker = string.lower | |
| 63 else: | |
| 64 base_masker = BaseReplacer( options.mask_character ) | |
| 65 | |
| 66 out = fastqWriter( open( args[1], 'wb' ), format = options.format ) | |
| 67 | |
| 68 num_reads = None | |
| 69 num_reads_excluded = 0 | |
| 70 for num_reads, fastq_read in enumerate( fastqReader( open( args[0] ), format = options.format ) ): | |
| 71 sequence_list = list( fastq_read.sequence ) | |
| 72 for i, quality_score in enumerate( fastq_read.get_decimal_quality_scores() ): | |
| 73 if score_comparer( quality_score, options.quality_score ): | |
| 74 sequence_list[ i ] = base_masker( sequence_list[ i ] ) | |
| 75 fastq_read.sequence = "".join( sequence_list ) | |
| 76 out.write( fastq_read ) | |
| 77 | |
| 78 if num_reads is not None: | |
| 79 print "Processed %i %s reads." % ( num_reads + 1, options.format ) | |
| 80 else: | |
| 81 print "No valid FASTQ reads were provided." | |
| 82 | |
| 83 if __name__ == "__main__": main() |
