Mercurial > repos > artbio > blastparser_and_hits
annotate BlastParser_and_hits.py @ 1:9beb85dba280 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit dfecfb40f245a3cdb09dd1cfe37be4cb164ad2eb
author | artbio |
---|---|
date | Fri, 16 Feb 2018 04:54:52 -0500 |
parents | 9dfb65ebb02e |
children | 36103afa0934 |
rev | line source |
---|---|
0
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
1 #!/usr/bin/python |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
2 import argparse |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
3 from collections import defaultdict |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
4 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
5 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
6 def Parser(): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
7 the_parser = argparse.ArgumentParser() |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
8 the_parser.add_argument('--blast', action="store", type=str, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
9 help="Path to the blast output\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
10 (tabular format, 12 column)") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
11 the_parser.add_argument('--sequences', action="store", type=str, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
12 help="Path to the fasta file with blasted\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
13 sequences") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
14 the_parser.add_argument('--fastaOutput', action="store", type=str, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
15 help="fasta output file of blast hits") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
16 the_parser.add_argument('--tabularOutput', action="store", type=str, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
17 help="tabular output file of blast analysis") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
18 the_parser.add_argument('--flanking', action="store", type=int, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
19 help="number of flanking nucleotides\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
20 added to the hit sequences") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
21 the_parser.add_argument('--mode', action="store", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
22 choices=["verbose", "short"], type=str, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
23 help="reporting (verbose) or not reporting (short)\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
24 oases contigs") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
25 the_parser.add_argument('--filter_relativeCov', action="store", type=float, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
26 default=0, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
27 help="filter out relative coverages\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
28 below the specified ratio (float number)") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
29 the_parser.add_argument('--filter_maxScore', action="store", type=float, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
30 default=0, help="filter out best BitScores below\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
31 the specified float number") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
32 the_parser.add_argument('--filter_meanScore', action="store", type=float, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
33 default=0, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
34 help="filter out mean BitScores below the\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
35 specified float number") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
36 the_parser.add_argument('--filter_term_in', action="store", type=str, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
37 default="", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
38 help="select the specified term in the\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
39 subject list") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
40 the_parser.add_argument('--filter_term_out', action="store", type=str, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
41 default="", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
42 help="exclude the specified term from\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
43 the subject list") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
44 the_parser.add_argument('--al_sequences', action="store", type=str, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
45 help="sequences that have been blast aligned") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
46 the_parser.add_argument('--un_sequences', action="store", type=str, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
47 help="sequences that have not been blast aligned") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
48 the_parser.add_argument('--dataset_name', action="store", type=str, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
49 default="", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
50 help="the name of the dataset that has been parsed,\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
51 to be reported in the output") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
52 args = the_parser.parse_args() |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
53 if not all((args.sequences, args.blast, args.fastaOutput, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
54 args.tabularOutput)): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
55 the_parser.error('argument(s) missing, call the\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
56 -h option of the script') |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
57 if not args.flanking: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
58 args.flanking = 0 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
59 return args |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
60 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
61 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
62 def median(lst): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
63 lst = sorted(lst) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
64 if len(lst) < 1: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
65 return None |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
66 if len(lst) % 2 == 1: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
67 return lst[((len(lst)+1)/2)-1] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
68 if len(lst) % 2 == 0: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
69 return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
70 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
71 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
72 def mean(lst): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
73 if len(lst) < 1: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
74 return 0 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
75 return sum(lst) / float(len(lst)) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
76 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
77 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
78 def getfasta(fastafile): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
79 fastadic = {} |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
80 for line in open(fastafile): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
81 if line[0] == ">": |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
82 header = line[1:-1] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
83 fastadic[header] = "" |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
84 else: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
85 fastadic[header] += line |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
86 for header in fastadic: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
87 fastadic[header] = "".join(fastadic[header].split("\n")) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
88 return fastadic |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
89 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
90 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
91 def insert_newlines(string, every=60): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
92 lines = [] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
93 for i in range(0, len(string), every): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
94 lines.append(string[i:i+every]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
95 return '\n'.join(lines) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
96 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
97 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
98 def getblast(blastfile): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
99 '''blastinfo [0] Percentage of identical matches |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
100 blastinfo [1] Alignment length |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
101 blastinfo [2] Number of mismatches |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
102 blastinfo [3] Number of gap openings |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
103 blastinfo [4] Start of alignment in query |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
104 blastinfo [5] End of alignment in query |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
105 blastinfo [6] Start of alignment in subject (database hit) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
106 blastinfo [7] End of alignment in subject (database hit) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
107 blastinfo [8] Expectation value (E-value) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
108 blastinfo [9] Bit score |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
109 blastinfo [10] Subject length |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
110 (NEED TO BE SPECIFIED WHEN RUNNING BLAST) ''' |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
111 blastdic = defaultdict(dict) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
112 for line in open(blastfile): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
113 fields = line[:-1].split("\t") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
114 transcript = fields[0] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
115 subject = fields[1] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
116 # blastinfo[0] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
117 blastinfo = [float(fields[2])] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
118 # blastinfo[1:8] insets 1 to 7 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
119 blastinfo = blastinfo + [int(i) for i in fields[3:10]] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
120 # blastinfo[8] E-value remains as a string type |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
121 blastinfo.append(fields[10]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
122 # blastinfo[9] Bit score |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
123 blastinfo.append(float(fields[11])) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
124 # blastinfo[10] Subject length MUST BE RETRIEVED |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
125 # THROUGH A 13 COLUMN BLAST OUTPUT |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
126 blastinfo.append(int(fields[12])) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
127 try: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
128 blastdic[subject][transcript].append(blastinfo) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
129 except Exception: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
130 blastdic[subject][transcript] = [blastinfo] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
131 return blastdic |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
132 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
133 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
134 def getseq(fastadict, transcript, up, down, orientation="direct"): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
135 def reverse(seq): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
136 revdict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"} |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
137 revseq = [revdict[i] for i in seq[::-1]] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
138 return "".join(revseq) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
139 pickseq = fastadict[transcript][up-1:down] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
140 if orientation == "direct": |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
141 return pickseq |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
142 else: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
143 return reverse(pickseq) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
144 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
145 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
146 def subjectCoverage(fastadict, blastdict, subject, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
147 QueriesFlankingNucleotides=0): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
148 SubjectCoverageList = [] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
149 HitDic = {} |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
150 bitScores = [] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
151 for transcript in blastdict[subject]: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
152 prefix = "%s--%s_" % (subject, transcript) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
153 hitNumber = 0 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
154 for hit in blastdict[subject][transcript]: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
155 hitNumber += 1 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
156 suffix = "hit%s_IdMatch=%s,AligLength=%s,E-val=%s" % (hitNumber, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
157 hit[0], |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
158 hit[1], |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
159 hit[8]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
160 # query coverage by a hit is in hit[4:6] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
161 HitDic[prefix+suffix] = GetHitSequence(fastadict, transcript, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
162 hit[4], hit[5], |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
163 QueriesFlankingNucleotides) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
164 # subject coverage by a hit is in hit[6:8] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
165 SubjectCoverageList += range(min([hit[6], hit[7]]), |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
166 max([hit[6], hit[7]]) + 1) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
167 bitScores.append(hit[9]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
168 # always the same value for a given subject. Stupid but simple |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
169 subjectLength = hit[10] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
170 TotalSubjectCoverage = len(set(SubjectCoverageList)) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
171 RelativeSubjectCoverage = TotalSubjectCoverage/float(subjectLength) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
172 return (HitDic, subjectLength, TotalSubjectCoverage, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
173 RelativeSubjectCoverage, max(bitScores), mean(bitScores)) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
174 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
175 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
176 def GetHitSequence(fastadict, FastaHeader, leftCoordinate, rightCoordinate, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
177 FlankingValue): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
178 if rightCoordinate > leftCoordinate: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
179 polarity = "direct" |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
180 else: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
181 polarity = "reverse" |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
182 leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
183 if leftCoordinate - FlankingValue > 0: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
184 leftCoordinate -= FlankingValue |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
185 else: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
186 leftCoordinate = 1 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
187 return getseq(fastadict, FastaHeader, leftCoordinate, rightCoordinate, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
188 polarity) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
189 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
190 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
191 def outputParsing(dataset_name, F, Fasta, results, Xblastdict, fastadict, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
192 filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
193 filter_term_in="", filter_term_out="", mode="verbose"): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
194 def filter_results(results, filter_relativeCov=0, filter_maxScore=0, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
195 filter_meanScore=0, filter_term_in="", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
196 filter_term_out=""): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
197 for subject in results.keys(): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
198 if results[subject][ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
199 "RelativeSubjectCoverage"] < filter_relativeCov: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
200 del results[subject] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
201 continue |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
202 if results[subject]["maxBitScores"] < filter_maxScore: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
203 del results[subject] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
204 continue |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
205 if results[subject]["meanBitScores"] < filter_meanScore: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
206 del results[subject] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
207 continue |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
208 if filter_term_in in subject: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
209 pass |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
210 else: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
211 del results[subject] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
212 continue |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
213 if filter_term_out and filter_term_out in subject: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
214 del results[subject] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
215 continue |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
216 return results |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
217 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
218 F = open(F, "w") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
219 Fasta = open(Fasta, "w") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
220 blasted_transcripts = [] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
221 filter_results(results, filter_relativeCov, filter_maxScore, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
222 filter_meanScore, filter_term_in, filter_term_out) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
223 for subject in results: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
224 for transcript in Xblastdict[subject]: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
225 blasted_transcripts.append(transcript) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
226 blasted_transcripts = list(set(blasted_transcripts)) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
227 if mode == "verbose": |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
228 F.write("--- %s ---\n" % dataset_name) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
229 F.write("# %s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ("SeqId", "%Identity", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
230 "AlignLength", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
231 "StartSubject", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
232 "EndSubject", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
233 "%QueryHitCov", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
234 "E-value", |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
235 "BitScore")) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
236 for subject in sorted(results, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
237 key=lambda x: results[x]["meanBitScores"], |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
238 reverse=True): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
239 F.write(" \n# %s\n" % subject) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
240 F.write("# Suject Length: %s\n" % |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
241 results[subject]["subjectLength"]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
242 F.write("# Total Subject Coverage: %s\n" % |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
243 results[subject]["TotalCoverage"]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
244 F.write("# Relative Subject Coverage: %s\n" % |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
245 results[subject]["RelativeSubjectCoverage"]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
246 F.write("# Best Bit Score: %s\n" % results[subject][ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
247 "maxBitScores"]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
248 F.write("# Mean Bit Score: %s\n" % results[subject][ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
249 "meanBitScores"]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
250 for header in results[subject]["HitDic"]: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
251 Fasta.write(">%s\n%s\n" % (header, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
252 insert_newlines(results[subject][ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
253 "HitDic"][ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
254 header]))) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
255 Fasta.write("\n") # final carriage return for the sequence |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
256 for transcript in Xblastdict[subject]: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
257 transcriptSize = float(len(fastadict[transcript])) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
258 for hit in Xblastdict[subject][transcript]: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
259 percentIdentity = hit[0] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
260 alignLenght = hit[1] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
261 subjectStart = hit[6] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
262 subjectEnd = hit[7] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
263 queryCov = "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
264 Eval, BitScore = hit[8], hit[9] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
265 info = [transcript] + [percentIdentity, alignLenght, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
266 subjectStart, subjectEnd, queryCov, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
267 Eval, BitScore] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
268 info = [str(i) for i in info] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
269 info = "\t".join(info) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
270 F.write("%s\n" % info) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
271 else: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
272 F.write("--- %s ---\n" % dataset_name) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
273 F.write("# subject\tsubject length\tTotal Subject Coverage\tRelative\ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
274 Subject Coverage\tBest Bit Score\tMean Bit Score\n") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
275 for subject in sorted(results, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
276 key=lambda x: results[x]["meanBitScores"], |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
277 reverse=True): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
278 line = [] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
279 line.append(subject) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
280 line.append(results[subject]["subjectLength"]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
281 line.append(results[subject]["TotalCoverage"]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
282 line.append(results[subject]["RelativeSubjectCoverage"]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
283 line.append(results[subject]["maxBitScores"]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
284 line.append(results[subject]["meanBitScores"]) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
285 line = [str(i) for i in line] |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
286 F.write("%s\n" % "\t".join(line)) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
287 for header in results[subject]["HitDic"]: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
288 Fasta.write(">%s\n%s\n" % (header, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
289 insert_newlines( |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
290 results[subject][ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
291 "HitDic"][header]))) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
292 Fasta.write("\n") # final carriage return for the sequence |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
293 F.close() |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
294 Fasta.close() |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
295 return blasted_transcripts |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
296 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
297 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
298 def dispatch_sequences(fastadict, blasted_transcripts, matched_sequences, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
299 unmatched_sequences): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
300 '''to output the sequences that matched and did not matched in the blast''' |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
301 F_matched = open(matched_sequences, "w") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
302 F_unmatched = open(unmatched_sequences, "w") |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
303 for transcript in fastadict: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
304 if transcript in blasted_transcripts: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
305 ''''list of blasted_transcripts is generated |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
306 by the outputParsing function''' |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
307 F_matched.write(">%s\n%s\n" % (transcript, insert_newlines( |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
308 fastadict[transcript]))) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
309 else: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
310 F_unmatched.write(">%s\n%s\n" % (transcript, insert_newlines( |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
311 fastadict[transcript]))) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
312 F_matched.close() |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
313 F_unmatched.close() |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
314 return |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
315 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
316 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
317 def __main__(): |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
318 args = Parser() |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
319 fastadict = getfasta(args.sequences) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
320 Xblastdict = getblast(args.blast) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
321 results = defaultdict(dict) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
322 for subject in Xblastdict: |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
323 results[subject]["HitDic"], results[subject]["subjectLength"], results[ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
324 subject]["TotalCoverage"], results[subject][ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
325 "RelativeSubjectCoverage"], results[subject][ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
326 "maxBitScores"], results[subject][ |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
327 "meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
328 args.flanking) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
329 blasted_transcripts = outputParsing( |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
330 args.dataset_name, args.tabularOutput, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
331 args.fastaOutput, results, Xblastdict, fastadict, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
332 filter_relativeCov=args.filter_relativeCov, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
333 filter_maxScore=args.filter_maxScore, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
334 filter_meanScore=args.filter_meanScore, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
335 filter_term_in=args.filter_term_in, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
336 filter_term_out=args.filter_term_out, mode=args.mode) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
337 dispatch_sequences(fastadict, blasted_transcripts, args.al_sequences, |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
338 args.un_sequences) |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
339 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
340 |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
341 if __name__ == "__main__": |
9dfb65ebb02e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff
changeset
|
342 __main__() |