annotate SMART/bacteriaRegulatoryRegion_Detection/changeName.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #! /usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 import optparse, os, sys, subprocess, tempfile, shutil
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 from optparse import OptionParser
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 def stop_err(msg):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 sys.stderr.write('%s\n' % msg)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 sys.exit()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 def changeName(fileName, format, name, outputName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 file = open(fileName, 'r')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 line = file.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 if format == "fasta":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 while not line.startswith('>'):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 line = file.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 old_name = line[1:]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 elif format == "gff":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 while line.startswith('#'):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 line = file.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 old_name = (line.split('\t'))[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 elif format == "sam":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 while line.startswith('@'):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 line = file.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 old_name = (line.split('\t'))[2]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 file.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 cmd = "sed \"s/%s/%s/g\" %s >%s " % (old_name.strip(), name.strip(), fileName, outputName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 proc = subprocess.Popen(cmd, shell=True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 proc.communicate()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 if proc.returncode != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 raise Exception("ERROR when launching '%s'" % cmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 def getName(fileName, format):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 file = open(fileName, 'r')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 line = file.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 if format == "gff":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 while line.startswith('#'):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 line = file.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 old_name = (line.split('\t'))[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 elif format == "sam":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 while line.startswith('@') or line.startswith('#'):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 line = file.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 old_name = (line.split('\t'))[2]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 file.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 return old_name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 def __main__():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 #Parse Command Line
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 parser = optparse.OptionParser()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 parser.add_option("", "--input1", dest="inputFile1", default=None, help="Choose a fasta file.")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 parser.add_option("", "--input2", dest="inputFile2", default=None, help="Choose a gff file.")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 parser.add_option("", "--input3", dest="inputFile3", default=None, help="Choose a sam file.")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 parser.add_option("", "--name", dest="name", default=None, help="Change to a new name.[compulsory] if there is only one input.")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 parser.add_option("", "--output1", dest="outputFile1", default=None, help="OutputFile1")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 parser.add_option("", "--output2", dest="outputFile2", default=None, help="OutputFile2")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 parser.add_option("", "--output3", dest="outputFile3", default=None, help="OutputFile3")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 (options, args) = parser.parse_args()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 #TODO:write raise Exception!!
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 #In case only one input
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 if options.name == None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 #find a default_name to unify the name for all input files
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 if options.inputFile1 != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 if options.inputFile2 == None and options.inputFile3 == None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 raise Exception("ERROR, only one input, you should identify a new name to modify.")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 elif options.inputFile2 != None and options.outputFile2 != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 default_name = getName(options.inputFile2, 'gff')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 changeName(options.inputFile1, 'fasta', default_name, options.outputFile1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 changeName(options.inputFile2, 'gff', default_name, options.outputFile2)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 if options.inputFile3 != None and options.outputFile3 != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 changeName(options.inputFile3, 'sam', default_name, options.outputFile3)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 elif options.inputFile3 != None and options.outputFile3 != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 default_name = getName(options.inputFile3, 'sam')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 changeName(options.inputFile3, 'sam', default_name, options.outputFile3)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 changeName(options.inputFile1, 'fasta', default_name, options.outputFile1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 if options.inputFile2 != None and options.outputFile2 != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 changeName(options.inputFile2, 'gff', default_name, options.outputFile2)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 if options.inputFile1 != None and options.outputFile1 != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 changeName(options.inputFile1, 'fasta', options.name, options.outputFile1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 if options.inputFile2 != None and options.outputFile2 != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 changeName(options.inputFile2, 'gff', options.name, options.outputFile2)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 if options.inputFile3 != None and options.outputFile3 != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 changeName(options.inputFile3, 'sam', options.name, options.outputFile3)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 if __name__ == '__main__':__main__()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 #test commands:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 #only one input:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 #python changeName.py --input1 NC_011744.fna --name NC_test --output1 out.fna
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 #several inputs:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 #python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --output1 out.fna --output2 out.gff
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 #python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --name NC_test --output1 out.fna --output2 out.gff
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 #python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --input3 NC_011744.sam --name NC_test2 --output1 out.fna --output2 out.gff --output3 out.sam
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 #python changeName.py --input1 NC_011744.fna --input3 out.sam --output1 out.fna --output3 out.sam
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99