comparison overlapping_reads.py @ 6:4da23f009c9e draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
author artbio
date Sun, 10 Sep 2017 10:27:19 -0400
parents a7fd04208764
children 07771982ef9b
comparison
equal deleted inserted replaced
5:a7fd04208764 6:4da23f009c9e
83 for chrom in all_query_positions: 83 for chrom in all_query_positions:
84 all_query_positions[chrom] = sorted( 84 all_query_positions[chrom] = sorted(
85 list(set(all_query_positions[chrom]))) 85 list(set(all_query_positions[chrom])))
86 return all_query_positions 86 return all_query_positions
87 87
88 def countpairs(self, uppers, lowers):
89 query_range = self.query_range
90 target_range = self.target_range
91 uppers = [seq for seq in uppers if (len(seq) in query_range or len(seq) in target_range)]
92 uppers_expanded = []
93 for seq in uppers:
94 expand = [seq for i in range(self.readdic[seq])]
95 uppers_expanded.extend(expand)
96 uppers = uppers_expanded
97 lowers = [seq for seq in lowers if (len(seq) in query_range or len(seq) in target_range)]
98 lowers_expanded = []
99 for seq in lowers:
100 expand = [seq for i in range(self.readdic[seq])]
101 lowers_expanded.extend(expand)
102 lowers = lowers_expanded
103 paired = []
104 for upread in uppers:
105 for downread in lowers:
106 if (len(upread) in query_range and len(downread) in
107 target_range) or (len(upread) in target_range and
108 len(downread) in query_range):
109 paired.append(upread)
110 lowers.remove(downread)
111 break
112 return len(paired)
113
88 def pairing(self): 114 def pairing(self):
89 F = open(self.output, 'w') 115 F = open(self.output, 'w')
90 query_range = self.query_range 116 query_range = self.query_range
91 target_range = self.target_range 117 target_range = self.target_range
92 overlap = self.overlap 118 overlap = self.overlap
93 stringresult = [] 119 stringresult = []
94 header_template = '>%s|coord=%s|strand %s|size=%s|nreads=%s\n%s\n' 120 header_template = '>%s|coord=%s|strand %s|size=%s|nreads=%s\n%s\n'
121 total_pairs = 0
122 print ('Chromosome\tNbre of pairs')
95 for chrom in sorted(self.chromosomes): 123 for chrom in sorted(self.chromosomes):
124 number_pairs = 0
96 for pos in self.all_query_positions[chrom]: 125 for pos in self.all_query_positions[chrom]:
97 stringbuffer = [] 126 stringbuffer = []
98 uppers = self.alignement_dic[chrom, pos, 'F'] 127 uppers = self.alignement_dic[chrom, pos, 'F']
99 lowers = self.alignement_dic[chrom, pos+overlap-1, 'R'] 128 lowers = self.alignement_dic[chrom, pos+overlap-1, 'R']
129 number_pairs += self.countpairs(uppers, lowers)
130 total_pairs += number_pairs
100 if uppers and lowers: 131 if uppers and lowers:
101 for upread in uppers: 132 for upread in uppers:
102 for downread in lowers: 133 for downread in lowers:
103 if (len(upread) in query_range and len(downread) in 134 if (len(upread) in query_range and len(downread) in
104 target_range) or (len(upread) in target_range 135 target_range) or (len(upread) in target_range
112 header_template % 143 header_template %
113 (chrom, pos+overlap-len(downread)+1, '-', 144 (chrom, pos+overlap-len(downread)+1, '-',
114 len(downread), self.readdic[downread], 145 len(downread), self.readdic[downread],
115 self.revcomp(downread))) 146 self.revcomp(downread)))
116 stringresult.extend(sorted(set(stringbuffer))) 147 stringresult.extend(sorted(set(stringbuffer)))
148 print('%s\t%s' % (chrom, number_pairs))
149 print('Total nbre of pairs that can be simultaneously formed\t%s' % total_pairs)
117 F.write(''.join(stringresult)) 150 F.write(''.join(stringresult))
118 151
119 def revcomp(self, sequence): 152 def revcomp(self, sequence):
120 antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"} 153 antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}
121 revseq = sequence[::-1] 154 revseq = sequence[::-1]