annotate tools/fastq/fastq_masker_by_quality.py @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #Dan Blankenberg
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 import string
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 from optparse import OptionParser
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 def get_score_comparer( operator ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 if operator == 'gt':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 return compare_gt
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 elif operator == 'ge':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 return compare_ge
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 elif operator == 'eq':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 return compare_eq
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 elif operator == 'lt':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 return compare_lt
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 elif operator == 'le':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 return compare_le
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 elif operator == 'ne':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 return compare_ne
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 raise 'Invalid operator provided: %s' % operator
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 def compare_gt( quality_score, threshold_value ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 return quality_score > threshold_value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 def compare_ge( quality_score, threshold_value ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 return quality_score >= threshold_value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 def compare_eq( quality_score, threshold_value ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 return quality_score == threshold_value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 def compare_ne( quality_score, threshold_value ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 return quality_score != threshold_value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 def compare_lt( quality_score, threshold_value ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 return quality_score < threshold_value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 def compare_le( quality_score, threshold_value ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 return quality_score <= threshold_value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 class BaseReplacer( object ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 def __init__( self, replace_character ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 self.replace_character = replace_character
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 def __call__( self, base_character ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 return self.replace_character
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 usage = "usage: %prog [options] input_file output_file"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 parser = OptionParser( usage=usage )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 parser.add_option( '-f', '--format', dest='format', type='choice', default='sanger', choices=( 'sanger', 'solexa', 'illumina' ), help='FASTQ variant type' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 parser.add_option( '-m', '--mask_character', dest='mask_character', default='N', help='Mask Character to use' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 parser.add_option( '-c', '--score_comparison', type="choice", dest='score_comparison', default='le', choices=('gt','ge','eq','lt', 'le', 'ne' ), help='Mask base when score is' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 parser.add_option( '-s', '--quality_score', type="float", dest='quality_score', default='0', help='Quality Score' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 parser.add_option( "-l", "--lowercase", action="store_true", dest="lowercase", default=False, help="Use lowercase masking")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 ( options, args ) = parser.parse_args()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 if len ( args ) != 2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 parser.error( "Need to specify an input file and an output file" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 score_comparer = get_score_comparer( options.score_comparison )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 if options.lowercase:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 base_masker = string.lower
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 base_masker = BaseReplacer( options.mask_character )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 out = fastqWriter( open( args[1], 'wb' ), format = options.format )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 num_reads = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 num_reads_excluded = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 for num_reads, fastq_read in enumerate( fastqReader( open( args[0] ), format = options.format ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 sequence_list = list( fastq_read.sequence )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 for i, quality_score in enumerate( fastq_read.get_decimal_quality_scores() ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 if score_comparer( quality_score, options.quality_score ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 sequence_list[ i ] = base_masker( sequence_list[ i ] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 fastq_read.sequence = "".join( sequence_list )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 out.write( fastq_read )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 if num_reads is not None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 print "Processed %i %s reads." % ( num_reads + 1, options.format )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 print "No valid FASTQ reads were provided."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 if __name__ == "__main__": main()