Mercurial > repos > artbio > blast_to_scaffold
annotate blast_to_scaffold.py @ 2:3041f611636f draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/blast_to_scaffold commit 22c413141878d86ec10e80ef43ba0da792232cb0
author | artbio |
---|---|
date | Wed, 11 Oct 2023 10:34:08 +0000 |
parents | 7d96b28eec49 |
children |
rev | line source |
---|---|
0
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
1 #!/usr/bin/env python |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
2 import argparse |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
3 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
4 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
5 def insert_newlines(string, every=60): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
6 lines = [] |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
7 for i in range(0, len(string), every): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
8 lines.append(string[i:i+every]) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
9 return '\n'.join(lines) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
10 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
11 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
12 def getseq(fastadict, transcript, up, down, orientation="direct"): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
13 def reverse(seq): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
14 revdict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"} |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
15 revseq = [revdict[i] for i in seq[::-1]] |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
16 return "".join(revseq) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
17 pickseq = fastadict[transcript][up-1:down] |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
18 if orientation == "direct": |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
19 return pickseq |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
20 else: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
21 return reverse(pickseq) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
22 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
23 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
24 def Parser(): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
25 the_parser = argparse.ArgumentParser( |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
26 description="Generate DNA scaffold from blastn or tblastx alignment\ |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
27 of Contigs") |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
28 the_parser.add_argument('--sequences', action="store", type=str, |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
29 help="input sequence file in fasta format") |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
30 the_parser.add_argument('--guideSequence', action="store", type=str, |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
31 help="the reference sequence to guide the scaffold\ |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
32 assembly in fasta format") |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
33 the_parser.add_argument('--blast-tab', dest="blast_tab", action="store", |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
34 type=str, |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
35 help="13-columns tabular blastn or tblastx output") |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
36 the_parser.add_argument('--output', action="store", type=str, |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
37 help="output file path, fasta format") |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
38 the_parser.add_argument('--scaffold_prefix', action="store", type=str, |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
39 help="the prefix that will be used for the header\ |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
40 of the fasta scaffold") |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
41 the_parser.add_argument('--scaffold_suffix', action="store", type=str, |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
42 help="the sufix that will be used for the header\ |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
43 of the fasta scaffold") |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
44 args = the_parser.parse_args() |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
45 return args |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
46 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
47 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
48 def blatnInfo(file): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
49 blastlist = [] |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
50 with open(file, "r") as f: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
51 for line in f: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
52 minilist = [] |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
53 fields = line.rstrip().split() |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
54 minilist.append(fields[0]) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
55 minilist.extend(fields[6:10]) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
56 blastlist.append(minilist) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
57 blastlist.sort(key=lambda x: x[3], reverse=True) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
58 return blastlist |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
59 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
60 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
61 def myContigs(file): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
62 Contigs = {} |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
63 with open(file, "r") as f: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
64 for line in f: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
65 if line[0] == ">": |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
66 header = line[1:-1] |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
67 Contigs[header] = "" |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
68 else: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
69 Contigs[header] += line[:-1] |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
70 return Contigs |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
71 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
72 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
73 def myGuide(file): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
74 Guide = {} |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
75 coordinate = 0 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
76 with open(file, "r") as f: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
77 for line in f: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
78 if line[0] == ">": |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
79 continue |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
80 else: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
81 for nucleotide in line[:-1]: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
82 coordinate += 1 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
83 Guide[coordinate] = nucleotide.lower() |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
84 return Guide |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
85 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
86 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
87 def updateGuide(blastlist, GuideDict, ContigsDict): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
88 ''' |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
89 the blastlist object is a list of list with |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
90 element [0] : name of the blasted Contig |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
91 element [1] : queryStart of the alignment to the reference |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
92 element [2] = queryStop of the alignment to the reference |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
93 element [3] : subjectStart of the alignment to the reference |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
94 element [4] = subjectStop of the alignment to the reference |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
95 ''' |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
96 for fields in blastlist: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
97 seqHeader = fields[0] |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
98 queryStart = int(fields[1]) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
99 queryStop = int(fields[2]) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
100 subjectStart = int(fields[3]) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
101 subjectStop = int(fields[4]) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
102 if subjectStart > subjectStop: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
103 subjectStart, subjectStop = subjectStop, subjectStart |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
104 orientation = "reverse" |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
105 else: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
106 orientation = "direct" |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
107 sequence = getseq(ContigsDict, seqHeader, queryStart, queryStop, |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
108 orientation) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
109 for i in range(subjectStart, subjectStop+1): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
110 try: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
111 del GuideDict[i] |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
112 except KeyError: |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
113 continue |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
114 for i, nucleotide in enumerate(sequence): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
115 GuideDict[i+subjectStart] = nucleotide |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
116 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
117 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
118 def finalAssembly(GuideDict, outputfile, prefix, suffix): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
119 finalSeqList = [] |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
120 for keys in sorted(GuideDict): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
121 finalSeqList.append(GuideDict[keys]) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
122 finalSequence = insert_newlines("".join(finalSeqList)) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
123 Out = open(outputfile, "w") |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
124 Out.write(">Scaffold_from_%s_guided_by_%s\n" % (prefix, suffix)) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
125 Out.write("%s\n" % finalSequence) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
126 Out.close() |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
127 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
128 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
129 def __main__(): |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
130 args = Parser() |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
131 ContigsDict = myContigs(args.sequences) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
132 GuideDict = myGuide(args.guideSequence) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
133 blastlist = blatnInfo(args.blast_tab) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
134 updateGuide(blastlist, GuideDict, ContigsDict) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
135 finalAssembly(GuideDict, args.output, args.scaffold_prefix, |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
136 args.scaffold_suffix) |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
137 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
138 |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
139 if __name__ == "__main__": |
7d96b28eec49
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff
changeset
|
140 __main__() |