Mercurial > repos > artbio > yac_clipper
comparison yac.py @ 3:94d67b195acd draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
author | artbio |
---|---|
date | Mon, 21 Jan 2019 18:46:04 -0500 |
parents | da08e89abd18 |
children | f7947c5a18b8 |
comparison
equal
deleted
inserted
replaced
2:da08e89abd18 | 3:94d67b195acd |
---|---|
44 self.output_format = output_format | 44 self.output_format = output_format |
45 self.adapter = adapter | 45 self.adapter = adapter |
46 self.minsize = int(minsize) | 46 self.minsize = int(minsize) |
47 self.maxsize = int(maxsize) | 47 self.maxsize = int(maxsize) |
48 self.Nmode = Nmode | 48 self.Nmode = Nmode |
49 for line in open(inputfile): | |
50 if line[0] == "@": | |
51 self.inputformat = "fastq" | |
52 break | |
53 elif line[0] == ">": | |
54 self.inputformat = "fasta" | |
49 | 55 |
50 def motives(sequence): | 56 def motives(sequence): |
51 ''' | 57 ''' |
52 return a list of motives for perfect (6nt) or | 58 return a list of motives for perfect (6nt) or |
53 imperfect (7nt with one mismatch) search on import string module | 59 imperfect (7nt with one mismatch) search on import string module |
63 self.adaptmotifs = motives(self.adapter) | 69 self.adaptmotifs = motives(self.adapter) |
64 | 70 |
65 def scanadapt(self, adaptmotives=[], sequence="", qscore=""): | 71 def scanadapt(self, adaptmotives=[], sequence="", qscore=""): |
66 '''scans sequence for adapter motives''' | 72 '''scans sequence for adapter motives''' |
67 match_position = sequence.rfind(adaptmotives[0]) | 73 match_position = sequence.rfind(adaptmotives[0]) |
68 if match_position != -1: | 74 if qscore: |
69 return sequence[:match_position], qscore[:match_position] | |
70 for motif in adaptmotives[1:]: | |
71 match_position = sequence.rfind(motif) | |
72 if match_position != -1: | 75 if match_position != -1: |
73 return sequence[:match_position], qscore[:match_position] | 76 return sequence[:match_position], qscore[:match_position] |
74 return sequence, qscore | 77 for motif in adaptmotives[1:]: |
78 match_position = sequence.rfind(motif) | |
79 if match_position != -1: | |
80 return sequence[:match_position], qscore[:match_position] | |
81 return sequence, qscore | |
82 else: | |
83 if match_position != -1: | |
84 return sequence[:match_position] | |
85 for motif in adaptmotives[1:]: | |
86 match_position = sequence.rfind(motif) | |
87 if match_position != -1: | |
88 return sequence[:match_position] | |
89 return sequence | |
75 | 90 |
76 def write_output(self, id, read, qscore, output): | 91 def write_output(self, id, read, qscore, output): |
77 if self.output_format == "fasta": | 92 if self.output_format == "fasta": |
78 block = ">{0}\n{1}\n".format(id, read) | 93 block = ">{0}\n{1}\n".format(id, read) |
79 else: | 94 else: |
80 block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore) | 95 block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore) |
81 output.write(block) | 96 output.write(block) |
82 | 97 |
83 def handle_io(self): | 98 def fasta_in_write_output(self, id, read, output): |
84 '''Open input file, pass read sequence and read qscore to clipping function. | 99 output.write(">{0}\n{1}\n".format(id, read)) |
85 Pass clipped read and qscore to output function.''' | 100 |
101 def handle_io_fastq(self): | |
102 '''Open input fastq file, pass read sequence and read qscore to | |
103 scanadapt function. Pass clipped read and qscore to output function.''' | |
86 id = 0 | 104 id = 0 |
87 output = open(self.outputfile, "a") | 105 output = open(self.outputfile, "a") |
88 with open(self.inputfile, "r") as input: | 106 with open(self.inputfile, "r") as input: |
89 block_gen = islice(input, 1, None, 2) | 107 block_gen = islice(input, 1, None, 2) |
90 for i, line in enumerate(block_gen): | 108 for i, line in enumerate(block_gen): |
98 if self.minsize <= len(trimmed_read) <= self.maxsize: | 116 if self.minsize <= len(trimmed_read) <= self.maxsize: |
99 if (self.Nmode == "reject") and ("N" in trimmed_read): | 117 if (self.Nmode == "reject") and ("N" in trimmed_read): |
100 continue | 118 continue |
101 id += 1 | 119 id += 1 |
102 self.write_output(id, trimmed_read, trimmed_qscore, output) | 120 self.write_output(id, trimmed_read, trimmed_qscore, output) |
103 output.close() | 121 output.close() |
122 | |
123 def handle_io_fasta(self): | |
124 '''Open input fasta file, pass header and read sequence to scanadapt | |
125 function. Pass clipped read and qscore to output function.''' | |
126 id = 0 | |
127 output = open(self.outputfile, "a") | |
128 with open(self.inputfile, "r") as input: | |
129 block_gen = islice(input, 1, None, 2) | |
130 for i, line in enumerate(block_gen): | |
131 read = line.rstrip() | |
132 trimmed_read = self.scanadapt(self.adaptmotifs, read) | |
133 if self.minsize <= len(trimmed_read) <= self.maxsize: | |
134 if (self.Nmode == "reject") and ("N" in trimmed_read): | |
135 continue | |
136 id += 1 | |
137 self.fasta_in_write_output(id, trimmed_read, output) | |
138 output.close() | |
104 | 139 |
105 | 140 |
106 def main(*argv): | 141 def main(*argv): |
107 instanceClip = Clip(*argv) | 142 instanceClip = Clip(*argv) |
108 instanceClip.handle_io() | 143 if instanceClip.inputformat == "fasta": |
144 instanceClip.handle_io_fasta() | |
145 else: | |
146 instanceClip.handle_io_fastq() | |
109 | 147 |
110 | 148 |
111 if __name__ == "__main__": | 149 if __name__ == "__main__": |
112 args = Parser() | 150 args = Parser() |
113 for inputfile in args.input: | 151 for inputfile in args.input: |