diff yac.py @ 3:94d67b195acd draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
author artbio
date Mon, 21 Jan 2019 18:46:04 -0500
parents da08e89abd18
children f7947c5a18b8
line wrap: on
line diff
--- a/yac.py	Sat Oct 13 17:09:16 2018 -0400
+++ b/yac.py	Mon Jan 21 18:46:04 2019 -0500
@@ -46,6 +46,12 @@
         self.minsize = int(minsize)
         self.maxsize = int(maxsize)
         self.Nmode = Nmode
+        for line in open(inputfile):
+            if line[0] == "@":
+                self.inputformat = "fastq"
+                break
+            elif line[0] == ">":
+                self.inputformat = "fasta"
 
         def motives(sequence):
             '''
@@ -65,13 +71,22 @@
     def scanadapt(self, adaptmotives=[], sequence="", qscore=""):
         '''scans sequence for adapter motives'''
         match_position = sequence.rfind(adaptmotives[0])
-        if match_position != -1:
-            return sequence[:match_position], qscore[:match_position]
-        for motif in adaptmotives[1:]:
-            match_position = sequence.rfind(motif)
+        if qscore:
             if match_position != -1:
                 return sequence[:match_position], qscore[:match_position]
-        return sequence, qscore
+            for motif in adaptmotives[1:]:
+                match_position = sequence.rfind(motif)
+                if match_position != -1:
+                    return sequence[:match_position], qscore[:match_position]
+            return sequence, qscore
+        else:
+            if match_position != -1:
+                return sequence[:match_position]
+            for motif in adaptmotives[1:]:
+                match_position = sequence.rfind(motif)
+                if match_position != -1:
+                    return sequence[:match_position]
+            return sequence
 
     def write_output(self, id, read, qscore, output):
         if self.output_format == "fasta":
@@ -80,9 +95,12 @@
             block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore)
         output.write(block)
 
-    def handle_io(self):
-        '''Open input file, pass read sequence and read qscore to clipping function.
-        Pass clipped read and qscore to output function.'''
+    def fasta_in_write_output(self, id, read, output):
+        output.write(">{0}\n{1}\n".format(id, read))
+
+    def handle_io_fastq(self):
+        '''Open input fastq file, pass read sequence and read qscore to
+        scanadapt function. Pass clipped read and qscore to output function.'''
         id = 0
         output = open(self.outputfile, "a")
         with open(self.inputfile, "r") as input:
@@ -100,12 +118,32 @@
                         continue
                     id += 1
                     self.write_output(id, trimmed_read, trimmed_qscore, output)
-            output.close()
+        output.close()
+
+    def handle_io_fasta(self):
+        '''Open input fasta file, pass header and read sequence to scanadapt
+        function. Pass clipped read and qscore to output function.'''
+        id = 0
+        output = open(self.outputfile, "a")
+        with open(self.inputfile, "r") as input:
+            block_gen = islice(input, 1, None, 2)
+            for i, line in enumerate(block_gen):
+                read = line.rstrip()
+                trimmed_read = self.scanadapt(self.adaptmotifs, read)
+                if self.minsize <= len(trimmed_read) <= self.maxsize:
+                    if (self.Nmode == "reject") and ("N" in trimmed_read):
+                        continue
+                    id += 1
+                    self.fasta_in_write_output(id, trimmed_read, output)
+        output.close()
 
 
 def main(*argv):
     instanceClip = Clip(*argv)
-    instanceClip.handle_io()
+    if instanceClip.inputformat == "fasta":
+        instanceClip.handle_io_fasta()
+    else:
+        instanceClip.handle_io_fastq()
 
 
 if __name__ == "__main__":