comparison tools/fastq/fastq_paired_end_deinterlacer.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #Florent Angly
2 import sys
3 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner
4
5 def main():
6 input_filename = sys.argv[1]
7 input_type = sys.argv[2] or 'sanger'
8 mate1_filename = sys.argv[3]
9 mate2_filename = sys.argv[4]
10 single1_filename = sys.argv[5]
11 single2_filename = sys.argv[6]
12
13 type = input_type
14 input = fastqNamedReader( open( input_filename, 'rb' ), format = type )
15 mate1_out = fastqWriter( open( mate1_filename, 'wb' ), format = type )
16 mate2_out = fastqWriter( open( mate2_filename, 'wb' ), format = type )
17 single1_out = fastqWriter( open( single1_filename, 'wb' ), format = type )
18 single2_out = fastqWriter( open( single2_filename, 'wb' ), format = type )
19 joiner = fastqJoiner( type )
20
21 i = None
22 skip_count = 0
23 found = {}
24 for i, mate1 in enumerate( fastqReader( open( input_filename, 'rb' ), format = type ) ):
25
26 if mate1.identifier in found:
27 del found[mate1.identifier]
28 continue
29
30 mate2 = input.get( joiner.get_paired_identifier( mate1 ) )
31
32 if mate2:
33 # This is a mate pair
34 found[mate2.identifier] = None
35 if joiner.is_first_mate( mate1 ):
36 mate1_out.write( mate1 )
37 mate2_out.write( mate2 )
38 else:
39 mate1_out.write( mate2 )
40 mate2_out.write( mate1 )
41 else:
42 # This is a single
43 skip_count += 1
44 if joiner.is_first_mate( mate1 ):
45 single1_out.write( mate1 )
46 else:
47 single2_out.write( mate1 )
48
49 if i is None:
50 print "Your input file contained no valid FASTQ sequences."
51 else:
52 if skip_count:
53 print 'There were %i reads with no mate.' % skip_count
54 print 'De-interlaced %s pairs of sequences.' % ( (i - skip_count + 1)/2 )
55
56 input.close()
57 mate1_out.close()
58 mate2_out.close()
59 single1_out.close()
60 single2_out.close()
61
62
63 if __name__ == "__main__":
64 main()