comparison yac.py @ 3:94d67b195acd draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
author artbio
date Mon, 21 Jan 2019 18:46:04 -0500
parents da08e89abd18
children f7947c5a18b8
comparison
equal deleted inserted replaced
2:da08e89abd18 3:94d67b195acd
44 self.output_format = output_format 44 self.output_format = output_format
45 self.adapter = adapter 45 self.adapter = adapter
46 self.minsize = int(minsize) 46 self.minsize = int(minsize)
47 self.maxsize = int(maxsize) 47 self.maxsize = int(maxsize)
48 self.Nmode = Nmode 48 self.Nmode = Nmode
49 for line in open(inputfile):
50 if line[0] == "@":
51 self.inputformat = "fastq"
52 break
53 elif line[0] == ">":
54 self.inputformat = "fasta"
49 55
50 def motives(sequence): 56 def motives(sequence):
51 ''' 57 '''
52 return a list of motives for perfect (6nt) or 58 return a list of motives for perfect (6nt) or
53 imperfect (7nt with one mismatch) search on import string module 59 imperfect (7nt with one mismatch) search on import string module
63 self.adaptmotifs = motives(self.adapter) 69 self.adaptmotifs = motives(self.adapter)
64 70
65 def scanadapt(self, adaptmotives=[], sequence="", qscore=""): 71 def scanadapt(self, adaptmotives=[], sequence="", qscore=""):
66 '''scans sequence for adapter motives''' 72 '''scans sequence for adapter motives'''
67 match_position = sequence.rfind(adaptmotives[0]) 73 match_position = sequence.rfind(adaptmotives[0])
68 if match_position != -1: 74 if qscore:
69 return sequence[:match_position], qscore[:match_position]
70 for motif in adaptmotives[1:]:
71 match_position = sequence.rfind(motif)
72 if match_position != -1: 75 if match_position != -1:
73 return sequence[:match_position], qscore[:match_position] 76 return sequence[:match_position], qscore[:match_position]
74 return sequence, qscore 77 for motif in adaptmotives[1:]:
78 match_position = sequence.rfind(motif)
79 if match_position != -1:
80 return sequence[:match_position], qscore[:match_position]
81 return sequence, qscore
82 else:
83 if match_position != -1:
84 return sequence[:match_position]
85 for motif in adaptmotives[1:]:
86 match_position = sequence.rfind(motif)
87 if match_position != -1:
88 return sequence[:match_position]
89 return sequence
75 90
76 def write_output(self, id, read, qscore, output): 91 def write_output(self, id, read, qscore, output):
77 if self.output_format == "fasta": 92 if self.output_format == "fasta":
78 block = ">{0}\n{1}\n".format(id, read) 93 block = ">{0}\n{1}\n".format(id, read)
79 else: 94 else:
80 block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore) 95 block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore)
81 output.write(block) 96 output.write(block)
82 97
83 def handle_io(self): 98 def fasta_in_write_output(self, id, read, output):
84 '''Open input file, pass read sequence and read qscore to clipping function. 99 output.write(">{0}\n{1}\n".format(id, read))
85 Pass clipped read and qscore to output function.''' 100
101 def handle_io_fastq(self):
102 '''Open input fastq file, pass read sequence and read qscore to
103 scanadapt function. Pass clipped read and qscore to output function.'''
86 id = 0 104 id = 0
87 output = open(self.outputfile, "a") 105 output = open(self.outputfile, "a")
88 with open(self.inputfile, "r") as input: 106 with open(self.inputfile, "r") as input:
89 block_gen = islice(input, 1, None, 2) 107 block_gen = islice(input, 1, None, 2)
90 for i, line in enumerate(block_gen): 108 for i, line in enumerate(block_gen):
98 if self.minsize <= len(trimmed_read) <= self.maxsize: 116 if self.minsize <= len(trimmed_read) <= self.maxsize:
99 if (self.Nmode == "reject") and ("N" in trimmed_read): 117 if (self.Nmode == "reject") and ("N" in trimmed_read):
100 continue 118 continue
101 id += 1 119 id += 1
102 self.write_output(id, trimmed_read, trimmed_qscore, output) 120 self.write_output(id, trimmed_read, trimmed_qscore, output)
103 output.close() 121 output.close()
122
123 def handle_io_fasta(self):
124 '''Open input fasta file, pass header and read sequence to scanadapt
125 function. Pass clipped read and qscore to output function.'''
126 id = 0
127 output = open(self.outputfile, "a")
128 with open(self.inputfile, "r") as input:
129 block_gen = islice(input, 1, None, 2)
130 for i, line in enumerate(block_gen):
131 read = line.rstrip()
132 trimmed_read = self.scanadapt(self.adaptmotifs, read)
133 if self.minsize <= len(trimmed_read) <= self.maxsize:
134 if (self.Nmode == "reject") and ("N" in trimmed_read):
135 continue
136 id += 1
137 self.fasta_in_write_output(id, trimmed_read, output)
138 output.close()
104 139
105 140
106 def main(*argv): 141 def main(*argv):
107 instanceClip = Clip(*argv) 142 instanceClip = Clip(*argv)
108 instanceClip.handle_io() 143 if instanceClip.inputformat == "fasta":
144 instanceClip.handle_io_fasta()
145 else:
146 instanceClip.handle_io_fastq()
109 147
110 148
111 if __name__ == "__main__": 149 if __name__ == "__main__":
112 args = Parser() 150 args = Parser()
113 for inputfile in args.input: 151 for inputfile in args.input: