Mercurial > repos > drosofff > blastx_to_scaffold
view blastx_to_scaffold.py @ 1:940c0c669e96 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit b6de14061c479f0418cd89e26d6f5ac26e565a07
author | drosofff |
---|---|
date | Wed, 09 Nov 2016 11:39:20 -0500 |
parents | a2e034f1638e |
children |
line wrap: on
line source
#!/usr/bin/python import sys import argparse def insert_newlines(string, every=60): lines = [] for i in xrange(0, len(string), every): lines.append(string[i:i+every]) return '\n'.join(lines) def Parser(): the_parser = argparse.ArgumentParser( description="Generate DNA scaffold from blastx alignment of Contigs") the_parser.add_argument( '--sequences', action="store", type=str, help="input sequence file in fasta format") the_parser.add_argument( '--blastx-tab', dest="blastx_tab", action="store", type=str, help="13-columns tabular blastx output") the_parser.add_argument( '--output', action="store", type=str, help="output file path, fasta format") args = the_parser.parse_args() return args def __main__(): args = Parser() protLenght = int (open (args.blastx_tab, "r").readline().split("\t")[12]) BlastxOutput = open (args.blastx_tab, "r") Contigs = open (args.sequences, "r") ContigsDict = {} protScaffold = {} for line in Contigs: if line[0] == ">": header = line[1:-1] ContigsDict[header] = "" else: ContigsDict[header] += line[:-1] protScaffold = dict ( [(i,"NNN") for i in range (1, protLenght+1)] ) for line in BlastxOutput: fields = line[:-1].split("\t") queryStart = int(fields[6]) queryStop = int(fields[7]) subjectStart = int(fields[8]) subjectStop = int(fields[9]) seqHeader = fields[0] sequence = ContigsDict[seqHeader] for i in range (subjectStart, subjectStop): del protScaffold[i] protScaffold[subjectStop] = ContigsDict[seqHeader][queryStart -1: queryStop] finalSeqList = [] for i in sorted (protScaffold): finalSeqList.append(protScaffold[i]) finalSequence = insert_newlines("".join(finalSeqList)) Out = open (args.output, "w") print >> Out, ">Scaffold" print >> Out, finalSequence BlastxOutput.close() Contigs.close() Out.close() if __name__ == "__main__": __main__()