Mercurial > repos > xuebing > sharplabtool
comparison tools/fastq/fastq_paired_end_deinterlacer.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #Florent Angly | |
2 import sys | |
3 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner | |
4 | |
5 def main(): | |
6 input_filename = sys.argv[1] | |
7 input_type = sys.argv[2] or 'sanger' | |
8 mate1_filename = sys.argv[3] | |
9 mate2_filename = sys.argv[4] | |
10 single1_filename = sys.argv[5] | |
11 single2_filename = sys.argv[6] | |
12 | |
13 type = input_type | |
14 input = fastqNamedReader( open( input_filename, 'rb' ), format = type ) | |
15 mate1_out = fastqWriter( open( mate1_filename, 'wb' ), format = type ) | |
16 mate2_out = fastqWriter( open( mate2_filename, 'wb' ), format = type ) | |
17 single1_out = fastqWriter( open( single1_filename, 'wb' ), format = type ) | |
18 single2_out = fastqWriter( open( single2_filename, 'wb' ), format = type ) | |
19 joiner = fastqJoiner( type ) | |
20 | |
21 i = None | |
22 skip_count = 0 | |
23 found = {} | |
24 for i, mate1 in enumerate( fastqReader( open( input_filename, 'rb' ), format = type ) ): | |
25 | |
26 if mate1.identifier in found: | |
27 del found[mate1.identifier] | |
28 continue | |
29 | |
30 mate2 = input.get( joiner.get_paired_identifier( mate1 ) ) | |
31 | |
32 if mate2: | |
33 # This is a mate pair | |
34 found[mate2.identifier] = None | |
35 if joiner.is_first_mate( mate1 ): | |
36 mate1_out.write( mate1 ) | |
37 mate2_out.write( mate2 ) | |
38 else: | |
39 mate1_out.write( mate2 ) | |
40 mate2_out.write( mate1 ) | |
41 else: | |
42 # This is a single | |
43 skip_count += 1 | |
44 if joiner.is_first_mate( mate1 ): | |
45 single1_out.write( mate1 ) | |
46 else: | |
47 single2_out.write( mate1 ) | |
48 | |
49 if i is None: | |
50 print "Your input file contained no valid FASTQ sequences." | |
51 else: | |
52 if skip_count: | |
53 print 'There were %i reads with no mate.' % skip_count | |
54 print 'De-interlaced %s pairs of sequences.' % ( (i - skip_count + 1)/2 ) | |
55 | |
56 input.close() | |
57 mate1_out.close() | |
58 mate2_out.close() | |
59 single1_out.close() | |
60 single2_out.close() | |
61 | |
62 | |
63 if __name__ == "__main__": | |
64 main() |