Mercurial > repos > drosofff > blastx_to_scaffold
comparison blastx_to_scaffold.py @ 0:a2e034f1638e draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author | drosofff |
---|---|
date | Sun, 21 Jun 2015 14:40:10 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a2e034f1638e |
---|---|
1 #!/usr/bin/python | |
2 import sys | |
3 import argparse | |
4 | |
5 def insert_newlines(string, every=60): | |
6 lines = [] | |
7 for i in xrange(0, len(string), every): | |
8 lines.append(string[i:i+every]) | |
9 return '\n'.join(lines) | |
10 | |
11 def Parser(): | |
12 the_parser = argparse.ArgumentParser( | |
13 description="Generate DNA scaffold from blastx alignment of Contigs") | |
14 the_parser.add_argument( | |
15 '--sequences', action="store", type=str, help="input sequence file in fasta format") | |
16 the_parser.add_argument( | |
17 '--blastx-tab', dest="blastx_tab", action="store", type=str, help="13-columns tabular blastx output") | |
18 the_parser.add_argument( | |
19 '--output', action="store", type=str, help="output file path, fasta format") | |
20 args = the_parser.parse_args() | |
21 return args | |
22 | |
23 def __main__(): | |
24 args = Parser() | |
25 protLenght = int (open (args.blastx_tab, "r").readline().split("\t")[12]) | |
26 BlastxOutput = open (args.blastx_tab, "r") | |
27 Contigs = open (args.sequences, "r") | |
28 ContigsDict = {} | |
29 protScaffold = {} | |
30 | |
31 for line in Contigs: | |
32 if line[0] == ">": | |
33 header = line[1:-1] | |
34 ContigsDict[header] = "" | |
35 else: | |
36 ContigsDict[header] += line[:-1] | |
37 | |
38 protScaffold = dict ( [(i,"NNN") for i in range (1, protLenght+1)] ) | |
39 | |
40 for line in BlastxOutput: | |
41 fields = line[:-1].split("\t") | |
42 queryStart = int(fields[6]) | |
43 queryStop = int(fields[7]) | |
44 subjectStart = int(fields[8]) | |
45 subjectStop = int(fields[9]) | |
46 seqHeader = fields[0] | |
47 sequence = ContigsDict[seqHeader] | |
48 for i in range (subjectStart, subjectStop): | |
49 del protScaffold[i] | |
50 protScaffold[subjectStop] = ContigsDict[seqHeader][queryStart -1: queryStop] | |
51 | |
52 finalSeqList = [] | |
53 for i in sorted (protScaffold): | |
54 finalSeqList.append(protScaffold[i]) | |
55 finalSequence = insert_newlines("".join(finalSeqList)) | |
56 | |
57 Out = open (args.output, "w") | |
58 print >> Out, ">Scaffold" | |
59 print >> Out, finalSequence | |
60 | |
61 BlastxOutput.close() | |
62 Contigs.close() | |
63 Out.close() | |
64 | |
65 if __name__ == "__main__": | |
66 __main__() |