0
|
1 #Florent Angly
|
|
2 import sys
|
|
3 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner
|
|
4
|
|
5 def main():
|
|
6 input_filename = sys.argv[1]
|
|
7 input_type = sys.argv[2] or 'sanger'
|
|
8 mate1_filename = sys.argv[3]
|
|
9 mate2_filename = sys.argv[4]
|
|
10 single1_filename = sys.argv[5]
|
|
11 single2_filename = sys.argv[6]
|
|
12
|
|
13 type = input_type
|
|
14 input = fastqNamedReader( open( input_filename, 'rb' ), format = type )
|
|
15 mate1_out = fastqWriter( open( mate1_filename, 'wb' ), format = type )
|
|
16 mate2_out = fastqWriter( open( mate2_filename, 'wb' ), format = type )
|
|
17 single1_out = fastqWriter( open( single1_filename, 'wb' ), format = type )
|
|
18 single2_out = fastqWriter( open( single2_filename, 'wb' ), format = type )
|
|
19 joiner = fastqJoiner( type )
|
|
20
|
|
21 i = None
|
|
22 skip_count = 0
|
|
23 found = {}
|
|
24 for i, mate1 in enumerate( fastqReader( open( input_filename, 'rb' ), format = type ) ):
|
|
25
|
|
26 if mate1.identifier in found:
|
|
27 del found[mate1.identifier]
|
|
28 continue
|
|
29
|
|
30 mate2 = input.get( joiner.get_paired_identifier( mate1 ) )
|
|
31
|
|
32 if mate2:
|
|
33 # This is a mate pair
|
|
34 found[mate2.identifier] = None
|
|
35 if joiner.is_first_mate( mate1 ):
|
|
36 mate1_out.write( mate1 )
|
|
37 mate2_out.write( mate2 )
|
|
38 else:
|
|
39 mate1_out.write( mate2 )
|
|
40 mate2_out.write( mate1 )
|
|
41 else:
|
|
42 # This is a single
|
|
43 skip_count += 1
|
|
44 if joiner.is_first_mate( mate1 ):
|
|
45 single1_out.write( mate1 )
|
|
46 else:
|
|
47 single2_out.write( mate1 )
|
|
48
|
|
49 if i is None:
|
|
50 print "Your input file contained no valid FASTQ sequences."
|
|
51 else:
|
|
52 if skip_count:
|
|
53 print 'There were %i reads with no mate.' % skip_count
|
|
54 print 'De-interlaced %s pairs of sequences.' % ( (i - skip_count + 1)/2 )
|
|
55
|
|
56 input.close()
|
|
57 mate1_out.close()
|
|
58 mate2_out.close()
|
|
59 single1_out.close()
|
|
60 single2_out.close()
|
|
61
|
|
62
|
|
63 if __name__ == "__main__":
|
|
64 main()
|