Mercurial > repos > drosofff > msp_blastparser_and_hits
annotate BlastParser_and_hits.py @ 16:0e51eef139ab draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit c81a2a37d69ad570068741411bae12dd8289d60c-dirty
author | drosofff |
---|---|
date | Wed, 11 Oct 2017 12:43:04 -0400 |
parents | 1991c830504a |
children |
rev | line source |
---|---|
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/python |
4
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
2 # blastn tblastn blastx parser revised 14-1-2016. |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
3 # drosofff@gmail.com |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
4 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
5 import argparse |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
6 from collections import defaultdict |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
7 |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
8 |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
9 def Parser(): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
10 the_parser = argparse.ArgumentParser() |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
11 the_parser.add_argument('--blast', action="store", type=str, help="Path to the blast output (tabular format, 12 column)") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") |
1
1964514aabde
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents:
0
diff
changeset
|
17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") |
3
8f5d48294f70
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3d9ddd0f6f3c3b97a3bebf52646731ad6771e178
drosofff
parents:
2
diff
changeset
|
18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out best BitScores below the specified float number") |
8f5d48294f70
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3d9ddd0f6f3c3b97a3bebf52646731ad6771e178
drosofff
parents:
2
diff
changeset
|
19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number") |
4
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
20 the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list") |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
21 the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list") |
1
1964514aabde
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents:
0
diff
changeset
|
22 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") |
1964514aabde
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents:
0
diff
changeset
|
23 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") |
6
78c34df2dd8d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
drosofff
parents:
5
diff
changeset
|
24 the_parser.add_argument('--dataset_name', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output") |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
25 args = the_parser.parse_args() |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
26 if not all((args.sequences, args.blast, args.fastaOutput, args.tabularOutput)): |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
27 the_parser.error('argument(s) missing, call the -h option of the script') |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
28 if not args.flanking: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
29 args.flanking = 0 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
30 return args |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
31 |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
32 |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
33 def median(lst): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
34 lst = sorted(lst) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
35 if len(lst) < 1: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
36 return None |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
37 if len(lst) % 2 == 1: |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
38 return lst[((len(lst)+1)/2)-1] |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
39 if len(lst) % 2 == 0: |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
40 return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
41 |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
42 |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
43 def mean(lst): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
44 if len(lst) < 1: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
45 return 0 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
46 return sum(lst) / float(len(lst)) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
47 |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
48 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
49 def getfasta(fastafile): |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
50 fastadic = {} |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
51 for line in open(fastafile): |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
52 if line[0] == ">": |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
53 header = line[1:-1] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
54 fastadic[header] = "" |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
55 else: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
56 fastadic[header] += line |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
57 for header in fastadic: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
58 fastadic[header] = "".join(fastadic[header].split("\n")) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
59 return fastadic |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
60 |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
61 |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
62 def insert_newlines(string, every=60): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
63 lines = [] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
64 for i in xrange(0, len(string), every): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
65 lines.append(string[i:i+every]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
66 return '\n'.join(lines) |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
67 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
68 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
69 def getblast(blastfile): |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
70 '''blastinfo [0] Percentage of identical matches |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
71 blastinfo [1] Alignment length |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
72 blastinfo [2] Number of mismatches |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
73 blastinfo [3] Number of gap openings |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
74 blastinfo [4] Start of alignment in query |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
75 blastinfo [5] End of alignment in query |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
76 blastinfo [6] Start of alignment in subject (database hit) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
77 blastinfo [7] End of alignment in subject (database hit) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
78 blastinfo [8] Expectation value (E-value) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
79 blastinfo [9] Bit score |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
80 blastinfo [10] Subject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) ''' |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
81 blastdic = defaultdict(dict) |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
82 for line in open(blastfile): |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
83 fields = line[:-1].split("\t") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
84 transcript = fields[0] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
85 subject = fields[1] |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
86 blastinfo = [float(fields[2])] # blastinfo[0] |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
87 blastinfo = blastinfo + [int(i) for i in fields[3:10]] # blastinfo[1:8] insets 1 to 7 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
88 blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
89 blastinfo.append(float(fields[11])) # blastinfo[9] Bit score |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
90 blastinfo.append(int(fields[12])) # blastinfo[10] Subject length MUST BE RETRIEVED THROUGH A 13 COLUMN BLAST OUTPUT |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
91 try: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
92 blastdic[subject][transcript].append(blastinfo) |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
93 except Exception: |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
94 blastdic[subject][transcript] = [blastinfo] |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
95 return blastdic |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
96 |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
97 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
98 def getseq(fastadict, transcript, up, down, orientation="direct"): |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
99 def reverse(seq): |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
100 revdict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"} |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
101 revseq = [revdict[i] for i in seq[::-1]] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
102 return "".join(revseq) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
103 pickseq = fastadict[transcript][up-1:down] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
104 if orientation == "direct": |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
105 return pickseq |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
106 else: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
107 return reverse(pickseq) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
108 |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
109 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
110 def subjectCoverage(fastadict, blastdict, subject, QueriesFlankingNucleotides=0): |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
111 SubjectCoverageList = [] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
112 HitDic = {} |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
113 bitScores = [] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
114 for transcript in blastdict[subject]: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
115 prefix = "%s--%s_" % (subject, transcript) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
116 hitNumber = 0 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
117 for hit in blastdict[subject][transcript]: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
118 hitNumber += 1 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
119 suffix = "hit%s_IdMatch=%s,AligLength=%s,E-val=%s" % (hitNumber, hit[0], hit[1], hit[8]) |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
120 HitDic[prefix+suffix] = GetHitSequence(fastadict, transcript, hit[4], hit[5], QueriesFlankingNucleotides) # query coverage by a hit is in hit[4:6] |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
121 SubjectCoverageList += range(min([hit[6], hit[7]]), max([hit[6], hit[7]]) + 1) # subject coverage by a hit is in hit[6:8] |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
122 bitScores.append(hit[9]) |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
123 subjectLength = hit[10] # always the same value for a given subject. Stupid but simple |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
124 TotalSubjectCoverage = len(set(SubjectCoverageList)) |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
125 RelativeSubjectCoverage = TotalSubjectCoverage/float(subjectLength) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
126 return HitDic, subjectLength, TotalSubjectCoverage, RelativeSubjectCoverage, max(bitScores), mean(bitScores) |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
127 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
128 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
129 def GetHitSequence(fastadict, FastaHeader, leftCoordinate, rightCoordinate, FlankingValue): |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
130 if rightCoordinate > leftCoordinate: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
131 polarity = "direct" |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
132 else: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
133 polarity = "reverse" |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
134 leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
135 if leftCoordinate - FlankingValue > 0: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
136 leftCoordinate -= FlankingValue |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
137 else: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
138 leftCoordinate = 1 |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
139 return getseq(fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
140 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
141 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
142 def outputParsing(dataset_name, F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"): |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
143 def filter_results(results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""): |
4
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
144 for subject in results.keys(): |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
145 if results[subject]["RelativeSubjectCoverage"] < filter_relativeCov: |
4
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
146 del results[subject] |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
147 continue |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
148 if results[subject]["maxBitScores"] < filter_maxScore: |
4
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
149 del results[subject] |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
150 continue |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
151 if results[subject]["meanBitScores"] < filter_meanScore: |
4
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
152 del results[subject] |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
153 continue |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
154 if filter_term_in in subject: |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
155 pass |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
156 else: |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
157 del results[subject] |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
158 continue |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
159 if filter_term_out and filter_term_out in subject: |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
160 del results[subject] |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
161 continue |
60b6bd959929
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
drosofff
parents:
3
diff
changeset
|
162 return results |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
163 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
164 F = open(F, "w") |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
165 Fasta = open(Fasta, "w") |
2
bb0d4cd765c5
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents:
1
diff
changeset
|
166 blasted_transcripts = [] |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
167 filter_results(results, filter_relativeCov, filter_maxScore, filter_meanScore, filter_term_in, filter_term_out) |
2
bb0d4cd765c5
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents:
1
diff
changeset
|
168 for subject in results: |
bb0d4cd765c5
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents:
1
diff
changeset
|
169 for transcript in Xblastdict[subject]: |
bb0d4cd765c5
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents:
1
diff
changeset
|
170 blasted_transcripts.append(transcript) |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
171 blasted_transcripts = list(set(blasted_transcripts)) |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
172 if mode == "verbose": |
6
78c34df2dd8d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
drosofff
parents:
5
diff
changeset
|
173 print >>F, "--- %s ---" % (dataset_name) |
78c34df2dd8d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
drosofff
parents:
5
diff
changeset
|
174 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore" |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
175 for subject in sorted(results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
6
78c34df2dd8d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
drosofff
parents:
5
diff
changeset
|
176 print >> F, " \n# %s" % subject |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
177 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
178 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
179 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) |
3
8f5d48294f70
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3d9ddd0f6f3c3b97a3bebf52646731ad6771e178
drosofff
parents:
2
diff
changeset
|
180 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"]) |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
181 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
182 for header in results[subject]["HitDic"]: |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
183 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header])) |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
184 print >> Fasta, "" # final carriage return for the sequence |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
185 for transcript in Xblastdict[subject]: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
186 transcriptSize = float(len(fastadict[transcript])) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
187 for hit in Xblastdict[subject][transcript]: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
188 percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov = hit[0], hit[1], hit[6], hit[7], "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
189 Eval, BitScore = hit[8], hit[9] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
190 info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
191 info = [str(i) for i in info] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
192 info = "\t".join(info) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
193 print >> F, info |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
194 else: |
6
78c34df2dd8d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
drosofff
parents:
5
diff
changeset
|
195 print >>F, "--- %s ---" % (dataset_name) |
3
8f5d48294f70
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3d9ddd0f6f3c3b97a3bebf52646731ad6771e178
drosofff
parents:
2
diff
changeset
|
196 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
197 for subject in sorted(results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
198 line = [] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
199 line.append(subject) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
200 line.append(results[subject]["subjectLength"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
201 line.append(results[subject]["TotalCoverage"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
202 line.append(results[subject]["RelativeSubjectCoverage"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
203 line.append(results[subject]["maxBitScores"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
204 line.append(results[subject]["meanBitScores"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
205 line = [str(i) for i in line] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
206 print >> F, "\t".join(line) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
207 for header in results[subject]["HitDic"]: |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
208 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header])) |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
209 print >> Fasta, "" # final carriage return for the sequence |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
210 F.close() |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
211 Fasta.close() |
2
bb0d4cd765c5
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents:
1
diff
changeset
|
212 return blasted_transcripts |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
213 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
214 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
215 def dispatch_sequences(fastadict, blasted_transcripts, matched_sequences, unmatched_sequences): |
1
1964514aabde
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents:
0
diff
changeset
|
216 '''to output the sequences that matched and did not matched in the blast''' |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
217 F_matched = open(matched_sequences, "w") |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
218 F_unmatched = open(unmatched_sequences, "w") |
1
1964514aabde
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents:
0
diff
changeset
|
219 for transcript in fastadict: |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
220 if transcript in blasted_transcripts: # list of blasted_transcripts is generated by the outputParsing function |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
221 print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript])) |
1
1964514aabde
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents:
0
diff
changeset
|
222 else: |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
223 print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript])) |
1
1964514aabde
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents:
0
diff
changeset
|
224 F_matched.close() |
1964514aabde
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents:
0
diff
changeset
|
225 F_unmatched.close() |
1964514aabde
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
drosofff
parents:
0
diff
changeset
|
226 return |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
227 |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
228 |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
229 def __main__(): |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
230 args = Parser() |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
231 fastadict = getfasta(args.sequences) |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
232 Xblastdict = getblast(args.blast) |
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
233 results = defaultdict(dict) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
234 for subject in Xblastdict: |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
235 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
236 blasted_transcripts = outputParsing(args.dataset_name, args.tabularOutput, |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
237 args.fastaOutput, results, Xblastdict, fastadict, |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
238 filter_relativeCov=args.filter_relativeCov, |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
239 filter_maxScore=args.filter_maxScore, |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
240 filter_meanScore=args.filter_meanScore, |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
241 filter_term_in=args.filter_term_in, |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
242 filter_term_out=args.filter_term_out, |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
243 mode=args.mode) |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
244 dispatch_sequences(fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) |
2
bb0d4cd765c5
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
drosofff
parents:
1
diff
changeset
|
245 |
15
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
246 if __name__ == "__main__": |
1991c830504a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
drosofff
parents:
6
diff
changeset
|
247 __main__() |