annotate blastx_to_scaffold.py @ 0:a2e034f1638e draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author drosofff
date Sun, 21 Jun 2015 14:40:10 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
1 #!/usr/bin/python
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
2 import sys
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
3 import argparse
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
4
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
5 def insert_newlines(string, every=60):
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
6 lines = []
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
7 for i in xrange(0, len(string), every):
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
8 lines.append(string[i:i+every])
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
9 return '\n'.join(lines)
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
10
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
11 def Parser():
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
12 the_parser = argparse.ArgumentParser(
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
13 description="Generate DNA scaffold from blastx alignment of Contigs")
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
14 the_parser.add_argument(
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
15 '--sequences', action="store", type=str, help="input sequence file in fasta format")
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
16 the_parser.add_argument(
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
17 '--blastx-tab', dest="blastx_tab", action="store", type=str, help="13-columns tabular blastx output")
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
18 the_parser.add_argument(
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
19 '--output', action="store", type=str, help="output file path, fasta format")
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
20 args = the_parser.parse_args()
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
21 return args
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
22
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
23 def __main__():
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
24 args = Parser()
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
25 protLenght = int (open (args.blastx_tab, "r").readline().split("\t")[12])
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
26 BlastxOutput = open (args.blastx_tab, "r")
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
27 Contigs = open (args.sequences, "r")
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
28 ContigsDict = {}
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
29 protScaffold = {}
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
30
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
31 for line in Contigs:
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
32 if line[0] == ">":
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
33 header = line[1:-1]
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
34 ContigsDict[header] = ""
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
35 else:
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
36 ContigsDict[header] += line[:-1]
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
37
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
38 protScaffold = dict ( [(i,"NNN") for i in range (1, protLenght+1)] )
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
39
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
40 for line in BlastxOutput:
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
41 fields = line[:-1].split("\t")
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
42 queryStart = int(fields[6])
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
43 queryStop = int(fields[7])
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
44 subjectStart = int(fields[8])
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
45 subjectStop = int(fields[9])
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
46 seqHeader = fields[0]
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
47 sequence = ContigsDict[seqHeader]
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
48 for i in range (subjectStart, subjectStop):
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
49 del protScaffold[i]
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
50 protScaffold[subjectStop] = ContigsDict[seqHeader][queryStart -1: queryStop]
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
51
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
52 finalSeqList = []
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
53 for i in sorted (protScaffold):
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
54 finalSeqList.append(protScaffold[i])
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
55 finalSequence = insert_newlines("".join(finalSeqList))
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
56
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
57 Out = open (args.output, "w")
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
58 print >> Out, ">Scaffold"
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
59 print >> Out, finalSequence
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
60
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
61 BlastxOutput.close()
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
62 Contigs.close()
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
63 Out.close()
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
64
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
65 if __name__ == "__main__":
a2e034f1638e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
66 __main__()