Mercurial > repos > artbio > yac_clipper
diff yac.py @ 3:94d67b195acd draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
author | artbio |
---|---|
date | Mon, 21 Jan 2019 18:46:04 -0500 |
parents | da08e89abd18 |
children | f7947c5a18b8 |
line wrap: on
line diff
--- a/yac.py Sat Oct 13 17:09:16 2018 -0400 +++ b/yac.py Mon Jan 21 18:46:04 2019 -0500 @@ -46,6 +46,12 @@ self.minsize = int(minsize) self.maxsize = int(maxsize) self.Nmode = Nmode + for line in open(inputfile): + if line[0] == "@": + self.inputformat = "fastq" + break + elif line[0] == ">": + self.inputformat = "fasta" def motives(sequence): ''' @@ -65,13 +71,22 @@ def scanadapt(self, adaptmotives=[], sequence="", qscore=""): '''scans sequence for adapter motives''' match_position = sequence.rfind(adaptmotives[0]) - if match_position != -1: - return sequence[:match_position], qscore[:match_position] - for motif in adaptmotives[1:]: - match_position = sequence.rfind(motif) + if qscore: if match_position != -1: return sequence[:match_position], qscore[:match_position] - return sequence, qscore + for motif in adaptmotives[1:]: + match_position = sequence.rfind(motif) + if match_position != -1: + return sequence[:match_position], qscore[:match_position] + return sequence, qscore + else: + if match_position != -1: + return sequence[:match_position] + for motif in adaptmotives[1:]: + match_position = sequence.rfind(motif) + if match_position != -1: + return sequence[:match_position] + return sequence def write_output(self, id, read, qscore, output): if self.output_format == "fasta": @@ -80,9 +95,12 @@ block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore) output.write(block) - def handle_io(self): - '''Open input file, pass read sequence and read qscore to clipping function. - Pass clipped read and qscore to output function.''' + def fasta_in_write_output(self, id, read, output): + output.write(">{0}\n{1}\n".format(id, read)) + + def handle_io_fastq(self): + '''Open input fastq file, pass read sequence and read qscore to + scanadapt function. Pass clipped read and qscore to output function.''' id = 0 output = open(self.outputfile, "a") with open(self.inputfile, "r") as input: @@ -100,12 +118,32 @@ continue id += 1 self.write_output(id, trimmed_read, trimmed_qscore, output) - output.close() + output.close() + + def handle_io_fasta(self): + '''Open input fasta file, pass header and read sequence to scanadapt + function. Pass clipped read and qscore to output function.''' + id = 0 + output = open(self.outputfile, "a") + with open(self.inputfile, "r") as input: + block_gen = islice(input, 1, None, 2) + for i, line in enumerate(block_gen): + read = line.rstrip() + trimmed_read = self.scanadapt(self.adaptmotifs, read) + if self.minsize <= len(trimmed_read) <= self.maxsize: + if (self.Nmode == "reject") and ("N" in trimmed_read): + continue + id += 1 + self.fasta_in_write_output(id, trimmed_read, output) + output.close() def main(*argv): instanceClip = Clip(*argv) - instanceClip.handle_io() + if instanceClip.inputformat == "fasta": + instanceClip.handle_io_fasta() + else: + instanceClip.handle_io_fastq() if __name__ == "__main__":