comparison fastq_paired_end_deinterlacer.py @ 0:f0949bc49926 draft

Imported from capsule None
author devteam
date Mon, 27 Jan 2014 09:27:16 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f0949bc49926
1 #Florent Angly
2 import sys
3 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner
4
5 def main():
6 input_filename = sys.argv[1]
7 input_type = sys.argv[2] or 'sanger'
8 mate1_filename = sys.argv[3]
9 mate2_filename = sys.argv[4]
10 single1_filename = sys.argv[5]
11 single2_filename = sys.argv[6]
12
13 type = input_type
14 input = fastqNamedReader( open( input_filename, 'rb' ), format = type )
15 mate1_out = fastqWriter( open( mate1_filename, 'wb' ), format = type )
16 mate2_out = fastqWriter( open( mate2_filename, 'wb' ), format = type )
17 single1_out = fastqWriter( open( single1_filename, 'wb' ), format = type )
18 single2_out = fastqWriter( open( single2_filename, 'wb' ), format = type )
19 joiner = fastqJoiner( type )
20
21 i = None
22 skip_count = 0
23 found = {}
24 for i, read in enumerate( fastqReader( open( input_filename, 'rb' ), format = type ) ):
25
26 if read.identifier in found:
27 del found[read.identifier]
28 continue
29
30 mate1 = input.get( read.identifier )
31
32 mate2 = input.get( joiner.get_paired_identifier( mate1 ) )
33
34 if mate2:
35 # This is a mate pair
36 found[mate2.identifier] = None
37 if joiner.is_first_mate( mate1 ):
38 mate1_out.write( mate1 )
39 mate2_out.write( mate2 )
40 else:
41 mate1_out.write( mate2 )
42 mate2_out.write( mate1 )
43 else:
44 # This is a single
45 skip_count += 1
46 if joiner.is_first_mate( mate1 ):
47 single1_out.write( mate1 )
48 else:
49 single2_out.write( mate1 )
50
51 if i is None:
52 print "Your input file contained no valid FASTQ sequences."
53 else:
54 if skip_count:
55 print 'There were %i reads with no mate.' % skip_count
56 print 'De-interlaced %s pairs of sequences.' % ( (i - skip_count + 1)/2 )
57
58 input.close()
59 mate1_out.close()
60 mate2_out.close()
61 single1_out.close()
62 single2_out.close()
63
64
65 if __name__ == "__main__":
66 main()