Mercurial > repos > drosofff > blastx_to_scaffold
annotate blastx_to_scaffold.py @ 0:a2e034f1638e draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author | drosofff |
---|---|
date | Sun, 21 Jun 2015 14:40:10 -0400 |
parents | |
children |
rev | line source |
---|---|
0
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/python |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
2 import sys |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
3 import argparse |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
4 |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
5 def insert_newlines(string, every=60): |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
6 lines = [] |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
7 for i in xrange(0, len(string), every): |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
8 lines.append(string[i:i+every]) |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
9 return '\n'.join(lines) |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
10 |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
11 def Parser(): |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
12 the_parser = argparse.ArgumentParser( |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
13 description="Generate DNA scaffold from blastx alignment of Contigs") |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
14 the_parser.add_argument( |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
15 '--sequences', action="store", type=str, help="input sequence file in fasta format") |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
16 the_parser.add_argument( |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
17 '--blastx-tab', dest="blastx_tab", action="store", type=str, help="13-columns tabular blastx output") |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
18 the_parser.add_argument( |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
19 '--output', action="store", type=str, help="output file path, fasta format") |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
20 args = the_parser.parse_args() |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
21 return args |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
22 |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
23 def __main__(): |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
24 args = Parser() |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
25 protLenght = int (open (args.blastx_tab, "r").readline().split("\t")[12]) |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
26 BlastxOutput = open (args.blastx_tab, "r") |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
27 Contigs = open (args.sequences, "r") |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
28 ContigsDict = {} |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
29 protScaffold = {} |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
30 |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
31 for line in Contigs: |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
32 if line[0] == ">": |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
33 header = line[1:-1] |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
34 ContigsDict[header] = "" |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
35 else: |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
36 ContigsDict[header] += line[:-1] |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
37 |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
38 protScaffold = dict ( [(i,"NNN") for i in range (1, protLenght+1)] ) |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
39 |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
40 for line in BlastxOutput: |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
41 fields = line[:-1].split("\t") |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
42 queryStart = int(fields[6]) |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
43 queryStop = int(fields[7]) |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
44 subjectStart = int(fields[8]) |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
45 subjectStop = int(fields[9]) |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
46 seqHeader = fields[0] |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
47 sequence = ContigsDict[seqHeader] |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
48 for i in range (subjectStart, subjectStop): |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
49 del protScaffold[i] |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
50 protScaffold[subjectStop] = ContigsDict[seqHeader][queryStart -1: queryStop] |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
51 |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
52 finalSeqList = [] |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
53 for i in sorted (protScaffold): |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
54 finalSeqList.append(protScaffold[i]) |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
55 finalSequence = insert_newlines("".join(finalSeqList)) |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
56 |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
57 Out = open (args.output, "w") |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
58 print >> Out, ">Scaffold" |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
59 print >> Out, finalSequence |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
60 |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
61 BlastxOutput.close() |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
62 Contigs.close() |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
63 Out.close() |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
64 |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
65 if __name__ == "__main__": |
a2e034f1638e
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
66 __main__() |