Mercurial > repos > devteam > fastq_paired_end_deinterlacer
comparison fastq_paired_end_deinterlacer.py @ 0:f0949bc49926 draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 27 Jan 2014 09:27:16 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f0949bc49926 |
---|---|
1 #Florent Angly | |
2 import sys | |
3 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner | |
4 | |
5 def main(): | |
6 input_filename = sys.argv[1] | |
7 input_type = sys.argv[2] or 'sanger' | |
8 mate1_filename = sys.argv[3] | |
9 mate2_filename = sys.argv[4] | |
10 single1_filename = sys.argv[5] | |
11 single2_filename = sys.argv[6] | |
12 | |
13 type = input_type | |
14 input = fastqNamedReader( open( input_filename, 'rb' ), format = type ) | |
15 mate1_out = fastqWriter( open( mate1_filename, 'wb' ), format = type ) | |
16 mate2_out = fastqWriter( open( mate2_filename, 'wb' ), format = type ) | |
17 single1_out = fastqWriter( open( single1_filename, 'wb' ), format = type ) | |
18 single2_out = fastqWriter( open( single2_filename, 'wb' ), format = type ) | |
19 joiner = fastqJoiner( type ) | |
20 | |
21 i = None | |
22 skip_count = 0 | |
23 found = {} | |
24 for i, read in enumerate( fastqReader( open( input_filename, 'rb' ), format = type ) ): | |
25 | |
26 if read.identifier in found: | |
27 del found[read.identifier] | |
28 continue | |
29 | |
30 mate1 = input.get( read.identifier ) | |
31 | |
32 mate2 = input.get( joiner.get_paired_identifier( mate1 ) ) | |
33 | |
34 if mate2: | |
35 # This is a mate pair | |
36 found[mate2.identifier] = None | |
37 if joiner.is_first_mate( mate1 ): | |
38 mate1_out.write( mate1 ) | |
39 mate2_out.write( mate2 ) | |
40 else: | |
41 mate1_out.write( mate2 ) | |
42 mate2_out.write( mate1 ) | |
43 else: | |
44 # This is a single | |
45 skip_count += 1 | |
46 if joiner.is_first_mate( mate1 ): | |
47 single1_out.write( mate1 ) | |
48 else: | |
49 single2_out.write( mate1 ) | |
50 | |
51 if i is None: | |
52 print "Your input file contained no valid FASTQ sequences." | |
53 else: | |
54 if skip_count: | |
55 print 'There were %i reads with no mate.' % skip_count | |
56 print 'De-interlaced %s pairs of sequences.' % ( (i - skip_count + 1)/2 ) | |
57 | |
58 input.close() | |
59 mate1_out.close() | |
60 mate2_out.close() | |
61 single1_out.close() | |
62 single2_out.close() | |
63 | |
64 | |
65 if __name__ == "__main__": | |
66 main() |