Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
annotate scripts/fastq_positional_quality_trimming.py @ 2:6837f733b4aa draft
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Wed, 22 Jan 2020 09:10:12 -0500 |
parents | 965517909457 |
children | 0cbed1c0a762 |
rev | line source |
---|---|
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
1 # -*- coding: utf-8 -*- |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
2 """ |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
3 This tool trims paired-end FASTQ files on the basis of quality score or left/right position, retaining mate integrity. |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
4 Reads without mate after filtering are saved in a separate output file. |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
5 """ |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
6 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
7 import math |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
8 import optparse |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
9 import sys |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
10 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
11 def average(values): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
12 """ Arithmetic mean of a list of values """ |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
13 return math.fsum(values) / len(values) if len(values) else float('nan') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
14 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
15 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
16 def phred2sanger(phred_scores): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
17 """ Convert an array of Phred quality scores (integers in [0, 93]) to a Sanger-encoded quality string""" |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
18 return ''.join([chr(score + 33) for score in phred_scores]) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
19 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
20 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
21 def sanger2phred(sanger_string): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
22 """ Convert a Sanger-encoded quality string to an array of Phred quality scores""" |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
23 return [ord(ch) - 33 for ch in sanger_string] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
24 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
25 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
26 def trimming(sequ, qual, maxlengthtrim, lefttrim, righttrim, minqualtrim, avgqualtrim): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
27 """ Trimming of sequence and quality of a read""" |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
28 # Maximum length trimming |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
29 if maxlengthtrim != -1: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
30 if len(sequ) > maxlengthtrim: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
31 sequ = sequ[: maxlengthtrim] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
32 qual = qual[: maxlengthtrim] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
33 # Left- and right-side trimming |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
34 if righttrim == 0: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
35 sequ = sequ[lefttrim :] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
36 qual = qual[lefttrim :] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
37 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
38 sequ = sequ[lefttrim : -righttrim] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
39 qual = qual[lefttrim : -righttrim] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
40 # Minimum quality right-side trimming |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
41 while len(sequ) and qual[-1] < minqualtrim: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
42 qual = qual[:-1] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
43 sequ = sequ[:-1] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
44 # Average quality right-side trimming |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
45 while len(sequ) and average(qual) < avgqualtrim: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
46 qual = qual[:-1] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
47 sequ = sequ[:-1] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
48 return sequ, qual |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
49 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
50 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
51 def __main__(): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
52 parser = optparse.OptionParser() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
53 parser.add_option('-1', '--input1', dest='input1', help='forward or single-end reads file in Sanger FASTQ format') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
54 parser.add_option('-2', '--input2', dest='input2', help='reverse reads file in Sanger FASTQ format') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
55 parser.add_option('--maxlt', dest='maxlengthtrim', type='int', default=400, help='maximum length trimming') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
56 parser.add_option('--lt', dest='lefttrim', type='int', default=0, help='left-side trimming') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
57 parser.add_option('--rt', dest='righttrim', type='int', default=0, help='right-side trimming') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
58 parser.add_option('--minqt', dest='minqualtrim', type='int', default=15, help='minimum quality right-side trimming') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
59 parser.add_option('--avgqt', dest='avgqualtrim', type='float', default=20, help='average quality right-side trimming') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
60 parser.add_option('--minlf', dest='minlen', type='int', default=25, help='minimum length filtering') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
61 parser.add_option('--trimmed1', dest='trimmed1', help='trimmed forward FASTQ file') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
62 parser.add_option('--trimmed2', dest='trimmed2', help='trimmed reverse FASTQ file') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
63 parser.add_option('--trimmedunpaired', dest='trimmedunpaired', help='trimmed unpaired FASTQ file') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
64 parser.add_option('--log', dest='logfile', help='log file') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
65 (options, args) = parser.parse_args() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
66 if len(args) > 0: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
67 parser.error('Wrong number of arguments') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
68 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
69 maxlengthtrim = options.maxlengthtrim |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
70 lefttrim = options.lefttrim |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
71 righttrim = options.righttrim |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
72 minqualtrim = options.minqualtrim |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
73 avgqualtrim = options.avgqualtrim |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
74 minlen = options.minlen |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
75 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
76 total_reads = 0 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
77 discarded_reads = 0 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
78 forward = open(options.input1) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
79 if options.input2: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
80 paired = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
81 passing_paired_reads = 0 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
82 passing_unpaired_reads = 0 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
83 reverse = open(options.input2) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
84 trimmed_reverse = open(options.trimmed2, 'w') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
85 trimmed_unpaired = open(options.trimmedunpaired, 'w') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
86 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
87 paired = False |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
88 passing_reads = 0 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
89 trimmed_forward = open(options.trimmed1, 'w') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
90 log = open(options.logfile, 'w') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
91 try: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
92 while True: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
93 headL = forward.next().rstrip() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
94 sequL = forward.next().rstrip() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
95 commL = forward.next().rstrip() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
96 sangL = forward.next().rstrip() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
97 qualL = sanger2phred(sangL) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
98 trimmed_sequL, trimmed_qualL = trimming(sequL, qualL, maxlengthtrim, lefttrim, righttrim, minqualtrim, avgqualtrim) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
99 if paired: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
100 try: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
101 headR = reverse.next().rstrip() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
102 sequR = reverse.next().rstrip() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
103 commR = reverse.next().rstrip() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
104 sangR = reverse.next().rstrip() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
105 except StopIteration: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
106 sys.exit('Reverse FASTQ file contain less reads than forward FASTQ file.') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
107 qualR = sanger2phred(sangR) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
108 trimmed_sequR, trimmed_qualR = trimming(sequR, qualR, maxlengthtrim, lefttrim, righttrim, minqualtrim, avgqualtrim) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
109 # Filter by residual length |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
110 if paired: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
111 if len(trimmed_sequL) >= minlen and len(trimmed_sequR) >= minlen: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
112 trimmed_forward.write(headL + '\n' + trimmed_sequL + '\n' + commL + '\n' + phred2sanger(trimmed_qualL) + '\n') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
113 trimmed_reverse.write(headR + '\n' + trimmed_sequR + '\n' + commR + '\n' + phred2sanger(trimmed_qualR) + '\n') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
114 passing_paired_reads += 1 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
115 elif len(trimmed_sequL) >= minlen and len(trimmed_sequR) < minlen: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
116 trimmed_unpaired.write(headL + '\n' + trimmed_sequL + '\n' + commL + '\n' + phred2sanger(trimmed_qualL) + '\n') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
117 passing_unpaired_reads += 1 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
118 elif len(trimmed_sequL) < minlen and len(trimmed_sequR) >= minlen: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
119 trimmed_unpaired.write(headR + '\n' + trimmed_sequR + '\n' + commR + '\n' + phred2sanger(trimmed_qualR) + '\n') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
120 passing_unpaired_reads += 1 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
121 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
122 discarded_reads += 1 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
123 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
124 if len(trimmed_sequL) >= minlen: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
125 trimmed_forward.write(headL + '\n' + trimmed_sequL + '\n' + commL + '\n' + phred2sanger(trimmed_qualL) + '\n') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
126 passing_reads += 1 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
127 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
128 discarded_reads += 1 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
129 total_reads += 1 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
130 except StopIteration: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
131 if paired: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
132 try: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
133 reverse.next() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
134 except StopIteration: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
135 log.write("Total paired reads : %d\n" % total_reads) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
136 log.write("Passing paired reads : %d\n" % passing_paired_reads) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
137 log.write("Passing unpaired reads : %d\n" % passing_unpaired_reads) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
138 log.write("Discarded paired reads : %d\n" % discarded_reads) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
139 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
140 sys.exit('Forward FASTQ file contain less reads than reverse FASTQ file.') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
141 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
142 log.write("Total reads : %d\n" % total_reads) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
143 log.write("Passing reads : %d\n" % passing_reads) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
144 log.write("Discarded reads : %d\n" % discarded_reads) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
145 finally: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
146 forward.close() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
147 trimmed_forward.close() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
148 log.close() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
149 if paired: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
150 reverse.close() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
151 trimmed_reverse.close() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
152 trimmed_unpaired.close() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
153 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
154 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
155 if __name__ == "__main__": |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
156 __main__() |