annotate BlastParser_and_hits.py @ 15:1991c830504a draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
author drosofff
date Wed, 09 Nov 2016 11:32:32 -0500
parents 78c34df2dd8d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
1 #!/usr/bin/python
4
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
2 # blastn tblastn blastx parser revised 14-1-2016.
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
3 # drosofff@gmail.com
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
4
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
5 import argparse
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
6 from collections import defaultdict
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
7
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
8
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
9 def Parser():
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
10 the_parser = argparse.ArgumentParser()
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
11 the_parser.add_argument('--blast', action="store", type=str, help="Path to the blast output (tabular format, 12 column)")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis")
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences")
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs")
1
1964514aabde planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents: 0
diff changeset
17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)")
3
8f5d48294f70 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3d9ddd0f6f3c3b97a3bebf52646731ad6771e178
drosofff
parents: 2
diff changeset
18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out best BitScores below the specified float number")
8f5d48294f70 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3d9ddd0f6f3c3b97a3bebf52646731ad6771e178
drosofff
parents: 2
diff changeset
19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number")
4
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
20 the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list")
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
21 the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list")
1
1964514aabde planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents: 0
diff changeset
22 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned")
1964514aabde planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents: 0
diff changeset
23 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned")
6
78c34df2dd8d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
drosofff
parents: 5
diff changeset
24 the_parser.add_argument('--dataset_name', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output")
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
25 args = the_parser.parse_args()
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
26 if not all((args.sequences, args.blast, args.fastaOutput, args.tabularOutput)):
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
27 the_parser.error('argument(s) missing, call the -h option of the script')
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
28 if not args.flanking:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
29 args.flanking = 0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
30 return args
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
31
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
32
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
33 def median(lst):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
34 lst = sorted(lst)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
35 if len(lst) < 1:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
36 return None
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
37 if len(lst) % 2 == 1:
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
38 return lst[((len(lst)+1)/2)-1]
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
39 if len(lst) % 2 == 0:
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
40 return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
41
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
42
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
43 def mean(lst):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
44 if len(lst) < 1:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
45 return 0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
46 return sum(lst) / float(len(lst))
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
47
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
48
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
49 def getfasta(fastafile):
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
50 fastadic = {}
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
51 for line in open(fastafile):
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
52 if line[0] == ">":
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
53 header = line[1:-1]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
54 fastadic[header] = ""
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
55 else:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
56 fastadic[header] += line
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
57 for header in fastadic:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
58 fastadic[header] = "".join(fastadic[header].split("\n"))
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
59 return fastadic
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
60
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
61
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
62 def insert_newlines(string, every=60):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
63 lines = []
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
64 for i in xrange(0, len(string), every):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
65 lines.append(string[i:i+every])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
66 return '\n'.join(lines)
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
67
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
68
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
69 def getblast(blastfile):
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
70 '''blastinfo [0] Percentage of identical matches
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
71 blastinfo [1] Alignment length
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
72 blastinfo [2] Number of mismatches
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
73 blastinfo [3] Number of gap openings
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
74 blastinfo [4] Start of alignment in query
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
75 blastinfo [5] End of alignment in query
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
76 blastinfo [6] Start of alignment in subject (database hit)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
77 blastinfo [7] End of alignment in subject (database hit)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
78 blastinfo [8] Expectation value (E-value)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
79 blastinfo [9] Bit score
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
80 blastinfo [10] Subject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) '''
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
81 blastdic = defaultdict(dict)
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
82 for line in open(blastfile):
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
83 fields = line[:-1].split("\t")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
84 transcript = fields[0]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
85 subject = fields[1]
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
86 blastinfo = [float(fields[2])] # blastinfo[0]
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
87 blastinfo = blastinfo + [int(i) for i in fields[3:10]] # blastinfo[1:8] insets 1 to 7
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
88 blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
89 blastinfo.append(float(fields[11])) # blastinfo[9] Bit score
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
90 blastinfo.append(int(fields[12])) # blastinfo[10] Subject length MUST BE RETRIEVED THROUGH A 13 COLUMN BLAST OUTPUT
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
91 try:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
92 blastdic[subject][transcript].append(blastinfo)
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
93 except Exception:
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
94 blastdic[subject][transcript] = [blastinfo]
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
95 return blastdic
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
96
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
97
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
98 def getseq(fastadict, transcript, up, down, orientation="direct"):
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
99 def reverse(seq):
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
100 revdict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
101 revseq = [revdict[i] for i in seq[::-1]]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
102 return "".join(revseq)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
103 pickseq = fastadict[transcript][up-1:down]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
104 if orientation == "direct":
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
105 return pickseq
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
106 else:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
107 return reverse(pickseq)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
108
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
109
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
110 def subjectCoverage(fastadict, blastdict, subject, QueriesFlankingNucleotides=0):
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
111 SubjectCoverageList = []
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
112 HitDic = {}
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
113 bitScores = []
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
114 for transcript in blastdict[subject]:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
115 prefix = "%s--%s_" % (subject, transcript)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
116 hitNumber = 0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
117 for hit in blastdict[subject][transcript]:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
118 hitNumber += 1
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
119 suffix = "hit%s_IdMatch=%s,AligLength=%s,E-val=%s" % (hitNumber, hit[0], hit[1], hit[8])
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
120 HitDic[prefix+suffix] = GetHitSequence(fastadict, transcript, hit[4], hit[5], QueriesFlankingNucleotides) # query coverage by a hit is in hit[4:6]
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
121 SubjectCoverageList += range(min([hit[6], hit[7]]), max([hit[6], hit[7]]) + 1) # subject coverage by a hit is in hit[6:8]
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
122 bitScores.append(hit[9])
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
123 subjectLength = hit[10] # always the same value for a given subject. Stupid but simple
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
124 TotalSubjectCoverage = len(set(SubjectCoverageList))
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
125 RelativeSubjectCoverage = TotalSubjectCoverage/float(subjectLength)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
126 return HitDic, subjectLength, TotalSubjectCoverage, RelativeSubjectCoverage, max(bitScores), mean(bitScores)
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
127
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
128
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
129 def GetHitSequence(fastadict, FastaHeader, leftCoordinate, rightCoordinate, FlankingValue):
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
130 if rightCoordinate > leftCoordinate:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
131 polarity = "direct"
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
132 else:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
133 polarity = "reverse"
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
134 leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
135 if leftCoordinate - FlankingValue > 0:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
136 leftCoordinate -= FlankingValue
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
137 else:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
138 leftCoordinate = 1
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
139 return getseq(fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity)
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
140
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
141
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
142 def outputParsing(dataset_name, F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"):
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
143 def filter_results(results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""):
4
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
144 for subject in results.keys():
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
145 if results[subject]["RelativeSubjectCoverage"] < filter_relativeCov:
4
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
146 del results[subject]
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
147 continue
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
148 if results[subject]["maxBitScores"] < filter_maxScore:
4
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
149 del results[subject]
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
150 continue
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
151 if results[subject]["meanBitScores"] < filter_meanScore:
4
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
152 del results[subject]
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
153 continue
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
154 if filter_term_in in subject:
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
155 pass
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
156 else:
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
157 del results[subject]
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
158 continue
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
159 if filter_term_out and filter_term_out in subject:
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
160 del results[subject]
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
161 continue
60b6bd959929 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents: 3
diff changeset
162 return results
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
163
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
164 F = open(F, "w")
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
165 Fasta = open(Fasta, "w")
2
bb0d4cd765c5 planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents: 1
diff changeset
166 blasted_transcripts = []
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
167 filter_results(results, filter_relativeCov, filter_maxScore, filter_meanScore, filter_term_in, filter_term_out)
2
bb0d4cd765c5 planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents: 1
diff changeset
168 for subject in results:
bb0d4cd765c5 planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents: 1
diff changeset
169 for transcript in Xblastdict[subject]:
bb0d4cd765c5 planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents: 1
diff changeset
170 blasted_transcripts.append(transcript)
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
171 blasted_transcripts = list(set(blasted_transcripts))
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
172 if mode == "verbose":
6
78c34df2dd8d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
drosofff
parents: 5
diff changeset
173 print >>F, "--- %s ---" % (dataset_name)
78c34df2dd8d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
drosofff
parents: 5
diff changeset
174 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore"
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
175 for subject in sorted(results, key=lambda x: results[x]["meanBitScores"], reverse=True):
6
78c34df2dd8d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
drosofff
parents: 5
diff changeset
176 print >> F, " \n# %s" % subject
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
177 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
178 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
179 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"])
3
8f5d48294f70 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3d9ddd0f6f3c3b97a3bebf52646731ad6771e178
drosofff
parents: 2
diff changeset
180 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"])
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
181 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
182 for header in results[subject]["HitDic"]:
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
183 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]))
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
184 print >> Fasta, "" # final carriage return for the sequence
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
185 for transcript in Xblastdict[subject]:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
186 transcriptSize = float(len(fastadict[transcript]))
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
187 for hit in Xblastdict[subject][transcript]:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
188 percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov = hit[0], hit[1], hit[6], hit[7], "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
189 Eval, BitScore = hit[8], hit[9]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
190 info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
191 info = [str(i) for i in info]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
192 info = "\t".join(info)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
193 print >> F, info
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
194 else:
6
78c34df2dd8d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
drosofff
parents: 5
diff changeset
195 print >>F, "--- %s ---" % (dataset_name)
3
8f5d48294f70 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3d9ddd0f6f3c3b97a3bebf52646731ad6771e178
drosofff
parents: 2
diff changeset
196 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score"
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
197 for subject in sorted(results, key=lambda x: results[x]["meanBitScores"], reverse=True):
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
198 line = []
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
199 line.append(subject)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
200 line.append(results[subject]["subjectLength"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
201 line.append(results[subject]["TotalCoverage"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
202 line.append(results[subject]["RelativeSubjectCoverage"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
203 line.append(results[subject]["maxBitScores"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
204 line.append(results[subject]["meanBitScores"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
205 line = [str(i) for i in line]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
206 print >> F, "\t".join(line)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
207 for header in results[subject]["HitDic"]:
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
208 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]))
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
209 print >> Fasta, "" # final carriage return for the sequence
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
210 F.close()
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
211 Fasta.close()
2
bb0d4cd765c5 planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents: 1
diff changeset
212 return blasted_transcripts
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
213
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
214
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
215 def dispatch_sequences(fastadict, blasted_transcripts, matched_sequences, unmatched_sequences):
1
1964514aabde planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents: 0
diff changeset
216 '''to output the sequences that matched and did not matched in the blast'''
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
217 F_matched = open(matched_sequences, "w")
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
218 F_unmatched = open(unmatched_sequences, "w")
1
1964514aabde planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents: 0
diff changeset
219 for transcript in fastadict:
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
220 if transcript in blasted_transcripts: # list of blasted_transcripts is generated by the outputParsing function
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
221 print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]))
1
1964514aabde planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents: 0
diff changeset
222 else:
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
223 print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]))
1
1964514aabde planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents: 0
diff changeset
224 F_matched.close()
1964514aabde planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents: 0
diff changeset
225 F_unmatched.close()
1964514aabde planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents: 0
diff changeset
226 return
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
227
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
228
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
229 def __main__():
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
230 args = Parser()
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
231 fastadict = getfasta(args.sequences)
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
232 Xblastdict = getblast(args.blast)
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
233 results = defaultdict(dict)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
234 for subject in Xblastdict:
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
235 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
236 blasted_transcripts = outputParsing(args.dataset_name, args.tabularOutput,
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
237 args.fastaOutput, results, Xblastdict, fastadict,
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
238 filter_relativeCov=args.filter_relativeCov,
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
239 filter_maxScore=args.filter_maxScore,
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
240 filter_meanScore=args.filter_meanScore,
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
241 filter_term_in=args.filter_term_in,
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
242 filter_term_out=args.filter_term_out,
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
243 mode=args.mode)
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
244 dispatch_sequences(fastadict, blasted_transcripts, args.al_sequences, args.un_sequences)
2
bb0d4cd765c5 planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents: 1
diff changeset
245
15
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
246 if __name__ == "__main__":
1991c830504a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents: 6
diff changeset
247 __main__()