view SMART/bacteriaRegulatoryRegion_Detection/changeName.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line source

#! /usr/bin/env python

import optparse, os, sys, subprocess, tempfile, shutil
from optparse import OptionParser

def stop_err(msg):
    sys.stderr.write('%s\n' % msg)
    sys.exit()
    
def changeName(fileName, format, name, outputName):
    file = open(fileName, 'r')
    line = file.readline()
    if format == "fasta":
        while not line.startswith('>'):
            line = file.readline()
        old_name = line[1:]
    elif format == "gff":
        while line.startswith('#'):
            line = file.readline()
        old_name = (line.split('\t'))[0]
    elif format == "sam":
        while line.startswith('@'):
            line = file.readline()
        old_name = (line.split('\t'))[2]
    file.close()    
    cmd = "sed \"s/%s/%s/g\" %s >%s " % (old_name.strip(), name.strip(), fileName, outputName)
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()
    if proc.returncode != 0:
        raise Exception("ERROR when launching '%s'" % cmd)
    
def getName(fileName, format):
    file = open(fileName, 'r')
    line = file.readline()
    if format == "gff":
        while line.startswith('#'):
            line = file.readline()
        old_name = (line.split('\t'))[0]
    elif format == "sam":
        while line.startswith('@') or line.startswith('#'):
            line = file.readline()
        old_name = (line.split('\t'))[2]
    file.close()    
    return old_name

def __main__():
    #Parse Command Line
    parser = optparse.OptionParser()
    parser.add_option("", "--input1", dest="inputFile1", default=None, help="Choose a fasta file.")
    parser.add_option("", "--input2", dest="inputFile2", default=None, help="Choose a gff file.")
    parser.add_option("", "--input3", dest="inputFile3", default=None, help="Choose a sam file.")
    parser.add_option("", "--name", dest="name", default=None, help="Change to a new name.[compulsory] if there is only one input.")
    parser.add_option("", "--output1", dest="outputFile1", default=None, help="OutputFile1")
    parser.add_option("", "--output2", dest="outputFile2", default=None, help="OutputFile2")
    parser.add_option("", "--output3", dest="outputFile3", default=None, help="OutputFile3")
    (options, args) = parser.parse_args() 
    
#TODO:write raise Exception!!
    
    #In case only one input
    if options.name == None:
        #find a default_name to unify the name for all input files
        if options.inputFile1 != None:
                if options.inputFile2 == None and options.inputFile3 == None:
                    raise Exception("ERROR, only one input, you should identify a new name to modify.")
                elif options.inputFile2 != None and options.outputFile2 != None:
                    default_name = getName(options.inputFile2, 'gff')
                    changeName(options.inputFile1, 'fasta', default_name, options.outputFile1)
                    changeName(options.inputFile2, 'gff', default_name, options.outputFile2)
                    if options.inputFile3 != None and options.outputFile3 != None:
                        changeName(options.inputFile3, 'sam', default_name, options.outputFile3)
                elif options.inputFile3 != None and options.outputFile3 != None:
                        default_name = getName(options.inputFile3, 'sam')
                        changeName(options.inputFile3, 'sam', default_name, options.outputFile3)
                        changeName(options.inputFile1, 'fasta', default_name, options.outputFile1)
                        if options.inputFile2 != None and options.outputFile2 != None:
                            changeName(options.inputFile2, 'gff', default_name, options.outputFile2)
    else:
        if options.inputFile1 != None and options.outputFile1 != None:      
            changeName(options.inputFile1, 'fasta', options.name, options.outputFile1)
        if options.inputFile2 != None and options.outputFile2 != None:
            changeName(options.inputFile2, 'gff', options.name, options.outputFile2)
        if options.inputFile3 != None and options.outputFile3 != None:
            changeName(options.inputFile3, 'sam', options.name, options.outputFile3)    
       
if __name__ == '__main__':__main__()


#test commands: 
#only one input:
#python changeName.py --input1 NC_011744.fna --name NC_test --output1 out.fna
#several inputs:
#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --output1 out.fna --output2 out.gff
#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --name NC_test --output1 out.fna --output2 out.gff
#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff  --input3 NC_011744.sam --name NC_test2 --output1 out.fna --output2 out.gff --output3 out.sam
#python changeName.py --input1 NC_011744.fna --input3 out.sam --output1 out.fna --output3 out.sam