diff SMART/bacteriaRegulatoryRegion_Detection/changeName.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/changeName.py	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,99 @@
+#! /usr/bin/env python
+
+import optparse, os, sys, subprocess, tempfile, shutil
+from optparse import OptionParser
+
+def stop_err(msg):
+    sys.stderr.write('%s\n' % msg)
+    sys.exit()
+    
+def changeName(fileName, format, name, outputName):
+    file = open(fileName, 'r')
+    line = file.readline()
+    if format == "fasta":
+        while not line.startswith('>'):
+            line = file.readline()
+        old_name = line[1:]
+    elif format == "gff":
+        while line.startswith('#'):
+            line = file.readline()
+        old_name = (line.split('\t'))[0]
+    elif format == "sam":
+        while line.startswith('@'):
+            line = file.readline()
+        old_name = (line.split('\t'))[2]
+    file.close()    
+    cmd = "sed \"s/%s/%s/g\" %s >%s " % (old_name.strip(), name.strip(), fileName, outputName)
+    proc = subprocess.Popen(cmd, shell=True)
+    proc.communicate()
+    if proc.returncode != 0:
+        raise Exception("ERROR when launching '%s'" % cmd)
+    
+def getName(fileName, format):
+    file = open(fileName, 'r')
+    line = file.readline()
+    if format == "gff":
+        while line.startswith('#'):
+            line = file.readline()
+        old_name = (line.split('\t'))[0]
+    elif format == "sam":
+        while line.startswith('@') or line.startswith('#'):
+            line = file.readline()
+        old_name = (line.split('\t'))[2]
+    file.close()    
+    return old_name
+
+def __main__():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option("", "--input1", dest="inputFile1", default=None, help="Choose a fasta file.")
+    parser.add_option("", "--input2", dest="inputFile2", default=None, help="Choose a gff file.")
+    parser.add_option("", "--input3", dest="inputFile3", default=None, help="Choose a sam file.")
+    parser.add_option("", "--name", dest="name", default=None, help="Change to a new name.[compulsory] if there is only one input.")
+    parser.add_option("", "--output1", dest="outputFile1", default=None, help="OutputFile1")
+    parser.add_option("", "--output2", dest="outputFile2", default=None, help="OutputFile2")
+    parser.add_option("", "--output3", dest="outputFile3", default=None, help="OutputFile3")
+    (options, args) = parser.parse_args() 
+    
+#TODO:write raise Exception!!
+    
+    #In case only one input
+    if options.name == None:
+        #find a default_name to unify the name for all input files
+        if options.inputFile1 != None:
+                if options.inputFile2 == None and options.inputFile3 == None:
+                    raise Exception("ERROR, only one input, you should identify a new name to modify.")
+                elif options.inputFile2 != None and options.outputFile2 != None:
+                    default_name = getName(options.inputFile2, 'gff')
+                    changeName(options.inputFile1, 'fasta', default_name, options.outputFile1)
+                    changeName(options.inputFile2, 'gff', default_name, options.outputFile2)
+                    if options.inputFile3 != None and options.outputFile3 != None:
+                        changeName(options.inputFile3, 'sam', default_name, options.outputFile3)
+                elif options.inputFile3 != None and options.outputFile3 != None:
+                        default_name = getName(options.inputFile3, 'sam')
+                        changeName(options.inputFile3, 'sam', default_name, options.outputFile3)
+                        changeName(options.inputFile1, 'fasta', default_name, options.outputFile1)
+                        if options.inputFile2 != None and options.outputFile2 != None:
+                            changeName(options.inputFile2, 'gff', default_name, options.outputFile2)
+    else:
+        if options.inputFile1 != None and options.outputFile1 != None:      
+            changeName(options.inputFile1, 'fasta', options.name, options.outputFile1)
+        if options.inputFile2 != None and options.outputFile2 != None:
+            changeName(options.inputFile2, 'gff', options.name, options.outputFile2)
+        if options.inputFile3 != None and options.outputFile3 != None:
+            changeName(options.inputFile3, 'sam', options.name, options.outputFile3)    
+       
+if __name__ == '__main__':__main__()
+
+
+#test commands: 
+#only one input:
+#python changeName.py --input1 NC_011744.fna --name NC_test --output1 out.fna
+#several inputs:
+#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --output1 out.fna --output2 out.gff
+#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --name NC_test --output1 out.fna --output2 out.gff
+#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff  --input3 NC_011744.sam --name NC_test2 --output1 out.fna --output2 out.gff --output3 out.sam
+#python changeName.py --input1 NC_011744.fna --input3 out.sam --output1 out.fna --output3 out.sam
+
+
+    
\ No newline at end of file