Mercurial > repos > artbio > yac_clipper
comparison yac.py @ 3:94d67b195acd draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
| author | artbio |
|---|---|
| date | Mon, 21 Jan 2019 18:46:04 -0500 |
| parents | da08e89abd18 |
| children | f7947c5a18b8 |
comparison
equal
deleted
inserted
replaced
| 2:da08e89abd18 | 3:94d67b195acd |
|---|---|
| 44 self.output_format = output_format | 44 self.output_format = output_format |
| 45 self.adapter = adapter | 45 self.adapter = adapter |
| 46 self.minsize = int(minsize) | 46 self.minsize = int(minsize) |
| 47 self.maxsize = int(maxsize) | 47 self.maxsize = int(maxsize) |
| 48 self.Nmode = Nmode | 48 self.Nmode = Nmode |
| 49 for line in open(inputfile): | |
| 50 if line[0] == "@": | |
| 51 self.inputformat = "fastq" | |
| 52 break | |
| 53 elif line[0] == ">": | |
| 54 self.inputformat = "fasta" | |
| 49 | 55 |
| 50 def motives(sequence): | 56 def motives(sequence): |
| 51 ''' | 57 ''' |
| 52 return a list of motives for perfect (6nt) or | 58 return a list of motives for perfect (6nt) or |
| 53 imperfect (7nt with one mismatch) search on import string module | 59 imperfect (7nt with one mismatch) search on import string module |
| 63 self.adaptmotifs = motives(self.adapter) | 69 self.adaptmotifs = motives(self.adapter) |
| 64 | 70 |
| 65 def scanadapt(self, adaptmotives=[], sequence="", qscore=""): | 71 def scanadapt(self, adaptmotives=[], sequence="", qscore=""): |
| 66 '''scans sequence for adapter motives''' | 72 '''scans sequence for adapter motives''' |
| 67 match_position = sequence.rfind(adaptmotives[0]) | 73 match_position = sequence.rfind(adaptmotives[0]) |
| 68 if match_position != -1: | 74 if qscore: |
| 69 return sequence[:match_position], qscore[:match_position] | |
| 70 for motif in adaptmotives[1:]: | |
| 71 match_position = sequence.rfind(motif) | |
| 72 if match_position != -1: | 75 if match_position != -1: |
| 73 return sequence[:match_position], qscore[:match_position] | 76 return sequence[:match_position], qscore[:match_position] |
| 74 return sequence, qscore | 77 for motif in adaptmotives[1:]: |
| 78 match_position = sequence.rfind(motif) | |
| 79 if match_position != -1: | |
| 80 return sequence[:match_position], qscore[:match_position] | |
| 81 return sequence, qscore | |
| 82 else: | |
| 83 if match_position != -1: | |
| 84 return sequence[:match_position] | |
| 85 for motif in adaptmotives[1:]: | |
| 86 match_position = sequence.rfind(motif) | |
| 87 if match_position != -1: | |
| 88 return sequence[:match_position] | |
| 89 return sequence | |
| 75 | 90 |
| 76 def write_output(self, id, read, qscore, output): | 91 def write_output(self, id, read, qscore, output): |
| 77 if self.output_format == "fasta": | 92 if self.output_format == "fasta": |
| 78 block = ">{0}\n{1}\n".format(id, read) | 93 block = ">{0}\n{1}\n".format(id, read) |
| 79 else: | 94 else: |
| 80 block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore) | 95 block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore) |
| 81 output.write(block) | 96 output.write(block) |
| 82 | 97 |
| 83 def handle_io(self): | 98 def fasta_in_write_output(self, id, read, output): |
| 84 '''Open input file, pass read sequence and read qscore to clipping function. | 99 output.write(">{0}\n{1}\n".format(id, read)) |
| 85 Pass clipped read and qscore to output function.''' | 100 |
| 101 def handle_io_fastq(self): | |
| 102 '''Open input fastq file, pass read sequence and read qscore to | |
| 103 scanadapt function. Pass clipped read and qscore to output function.''' | |
| 86 id = 0 | 104 id = 0 |
| 87 output = open(self.outputfile, "a") | 105 output = open(self.outputfile, "a") |
| 88 with open(self.inputfile, "r") as input: | 106 with open(self.inputfile, "r") as input: |
| 89 block_gen = islice(input, 1, None, 2) | 107 block_gen = islice(input, 1, None, 2) |
| 90 for i, line in enumerate(block_gen): | 108 for i, line in enumerate(block_gen): |
| 98 if self.minsize <= len(trimmed_read) <= self.maxsize: | 116 if self.minsize <= len(trimmed_read) <= self.maxsize: |
| 99 if (self.Nmode == "reject") and ("N" in trimmed_read): | 117 if (self.Nmode == "reject") and ("N" in trimmed_read): |
| 100 continue | 118 continue |
| 101 id += 1 | 119 id += 1 |
| 102 self.write_output(id, trimmed_read, trimmed_qscore, output) | 120 self.write_output(id, trimmed_read, trimmed_qscore, output) |
| 103 output.close() | 121 output.close() |
| 122 | |
| 123 def handle_io_fasta(self): | |
| 124 '''Open input fasta file, pass header and read sequence to scanadapt | |
| 125 function. Pass clipped read and qscore to output function.''' | |
| 126 id = 0 | |
| 127 output = open(self.outputfile, "a") | |
| 128 with open(self.inputfile, "r") as input: | |
| 129 block_gen = islice(input, 1, None, 2) | |
| 130 for i, line in enumerate(block_gen): | |
| 131 read = line.rstrip() | |
| 132 trimmed_read = self.scanadapt(self.adaptmotifs, read) | |
| 133 if self.minsize <= len(trimmed_read) <= self.maxsize: | |
| 134 if (self.Nmode == "reject") and ("N" in trimmed_read): | |
| 135 continue | |
| 136 id += 1 | |
| 137 self.fasta_in_write_output(id, trimmed_read, output) | |
| 138 output.close() | |
| 104 | 139 |
| 105 | 140 |
| 106 def main(*argv): | 141 def main(*argv): |
| 107 instanceClip = Clip(*argv) | 142 instanceClip = Clip(*argv) |
| 108 instanceClip.handle_io() | 143 if instanceClip.inputformat == "fasta": |
| 144 instanceClip.handle_io_fasta() | |
| 145 else: | |
| 146 instanceClip.handle_io_fastq() | |
| 109 | 147 |
| 110 | 148 |
| 111 if __name__ == "__main__": | 149 if __name__ == "__main__": |
| 112 args = Parser() | 150 args = Parser() |
| 113 for inputfile in args.input: | 151 for inputfile in args.input: |
