annotate BlastParser_and_hits.py @ 0:69ea2a13947f draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author drosofff
date Sun, 21 Jun 2015 14:31:29 -0400
parents
children 1964514aabde
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
1 #!/usr/bin/python
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
2 # blastn blastx parser revised debugged: 3-4-2015. Commit issue.
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
3 # drosofff@gmail.com
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
4
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
5 import sys
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
6 import argparse
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
7 from collections import defaultdict
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
8
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
9 def Parser():
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
10 the_parser = argparse.ArgumentParser()
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
11 the_parser.add_argument('--blast', action="store", type=str, help="Path to the blast output (tabular format, 12 column)")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
17 args = the_parser.parse_args()
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
18 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
19 the_parser.error('argument(s) missing, call the -h option of the script')
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
20 if not args.flanking:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
21 args.flanking = 0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
22 return args
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
23
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
24 def median(lst):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
25 lst = sorted(lst)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
26 if len(lst) < 1:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
27 return None
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
28 if len(lst) %2 == 1:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
29 return lst[((len(lst)+1)/2)-1]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
30 if len(lst) %2 == 0:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
31 return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
32
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
33 def mean(lst):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
34 if len(lst) < 1:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
35 return 0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
36 return sum(lst) / float(len(lst))
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
37
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
38 def getfasta (fastafile):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
39 fastadic = {}
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
40 for line in open (fastafile):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
41 if line[0] == ">":
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
42 header = line[1:-1]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
43 fastadic[header] = ""
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
44 else:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
45 fastadic[header] += line
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
46 for header in fastadic:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
47 fastadic[header] = "".join(fastadic[header].split("\n"))
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
48 return fastadic
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
49
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
50 def insert_newlines(string, every=60):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
51 lines = []
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
52 for i in xrange(0, len(string), every):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
53 lines.append(string[i:i+every])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
54 return '\n'.join(lines)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
55
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
56 def getblast (blastfile):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
57 '''blastinfo [0] Percentage of identical matches
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
58 blastinfo [1] Alignment length
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
59 blastinfo [2] Number of mismatches
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
60 blastinfo [3] Number of gap openings
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
61 blastinfo [4] Start of alignment in query
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
62 blastinfo [5] End of alignment in query
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
63 blastinfo [6] Start of alignment in subject (database hit)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
64 blastinfo [7] End of alignment in subject (database hit)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
65 blastinfo [8] Expectation value (E-value)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
66 blastinfo [9] Bit score
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
67 blastinfo [10] Subject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) '''
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
68 blastdic = defaultdict (dict)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
69 for line in open (blastfile):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
70 fields = line[:-1].split("\t")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
71 transcript = fields[0]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
72 subject = fields[1]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
73 blastinfo = [float(fields[2]) ] # blastinfo[0]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
74 blastinfo = blastinfo + [int(i) for i in fields[3:10] ] # blastinfo[1:8] insets 1 to 7
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
75 blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
76 blastinfo.append(float(fields[11])) # blastinfo[9] Bit score
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
77 blastinfo.append(int(fields[12])) # blastinfo[10] Subject length MUST BE RETRIEVED THROUGH A 13 COLUMN BLAST OUTPUT
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
78 try:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
79 blastdic[subject][transcript].append(blastinfo)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
80 except:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
81 blastdic[subject][transcript] = [ blastinfo ]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
82 return blastdic
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
83
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
84 def getseq (fastadict, transcript, up, down, orientation="direct"):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
85 def reverse (seq):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
86 revdict = {"A":"T","T":"A","G":"C","C":"G","N":"N"}
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
87 revseq = [revdict[i] for i in seq[::-1]]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
88 return "".join(revseq)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
89 pickseq = fastadict[transcript][up-1:down]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
90 if orientation == "direct":
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
91 return pickseq
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
92 else:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
93 return reverse(pickseq)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
94
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
95 def subjectCoverage (fastadict, blastdict, subject, QueriesFlankingNucleotides=0):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
96 SubjectCoverageList = []
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
97 HitDic = {}
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
98 bitScores = []
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
99 for transcript in blastdict[subject]:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
100 prefix = "%s--%s_" % (subject, transcript)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
101 hitNumber = 0
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
102 for hit in blastdict[subject][transcript]:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
103 hitNumber += 1
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
104 suffix = "hit%s_IdMatch=%s,AligLength=%s,E-val=%s" % (hitNumber, hit[0], hit[1], hit[8])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
105 HitDic[prefix+suffix] = GetHitSequence (fastadict, transcript, hit[4], hit[5], QueriesFlankingNucleotides) #query coverage by a hit is in hit[4:6]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
106 SubjectCoverageList += range (min([hit[6], hit[7]]), max([hit[6], hit[7]]) + 1) # subject coverage by a hit is in hit[6:8]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
107 bitScores.append(hit[9])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
108 subjectLength = hit [10] # always the same value for a given subject. Stupid but simple
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
109 TotalSubjectCoverage = len ( set (SubjectCoverageList) )
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
110 RelativeSubjectCoverage = TotalSubjectCoverage/float(subjectLength)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
111 return HitDic, subjectLength, TotalSubjectCoverage, RelativeSubjectCoverage, max(bitScores), mean(bitScores)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
112
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
113 def GetHitSequence (fastadict, FastaHeader, leftCoordinate, rightCoordinate, FlankingValue):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
114 if rightCoordinate > leftCoordinate:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
115 polarity = "direct"
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
116 else:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
117 polarity = "reverse"
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
118 leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
119 if leftCoordinate - FlankingValue > 0:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
120 leftCoordinate -= FlankingValue
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
121 else:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
122 leftCoordinate = 1
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
123 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
124
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
125 def outputParsing (F, Fasta, results, Xblastdict, fastadict, mode="verbose"):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
126 F= open(F, "w")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
127 Fasta=open(Fasta, "w")
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
128 if mode == "verbose":
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
129 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n"
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
130 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
131 print >> F, "#\n# %s" % subject
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
132 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
133 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
134 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
135 print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
136 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
137 for header in results[subject]["HitDic"]:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
138 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) )
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
139 print >> Fasta, "" # final carriage return for the sequence
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
140 for transcript in Xblastdict[subject]:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
141 transcriptSize = float(len(fastadict[transcript]))
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
142 for hit in Xblastdict[subject][transcript]:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
143 percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov = hit[0], hit[1], hit[6], hit[7], "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
144 Eval, BitScore = hit[8], hit[9]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
145 info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
146 info = [str(i) for i in info]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
147 info = "\t".join(info)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
148 print >> F, info
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
149 else:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
150 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score"
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
151 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
152 line = []
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
153 line.append(subject)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
154 line.append(results[subject]["subjectLength"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
155 line.append(results[subject]["TotalCoverage"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
156 line.append(results[subject]["RelativeSubjectCoverage"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
157 line.append(results[subject]["maxBitScores"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
158 line.append(results[subject]["meanBitScores"])
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
159 line = [str(i) for i in line]
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
160 print >> F, "\t".join(line)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
161 for header in results[subject]["HitDic"]:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
162 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) )
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
163 print >> Fasta, "" # final carriage return for the sequence
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
164 F.close()
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
165 Fasta.close()
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
166
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
167
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
168
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
169 def __main__ ():
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
170 args = Parser()
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
171 fastadict = getfasta (args.sequences)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
172 Xblastdict = getblast (args.blast)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
173 results = defaultdict(dict)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
174 for subject in Xblastdict:
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
175 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
176 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, args.mode)
69ea2a13947f planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
177 if __name__=="__main__": __main__()