Mercurial > repos > artbio > small_rna_signatures
comparison overlapping_reads.py @ 6:4da23f009c9e draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4
author | artbio |
---|---|
date | Sun, 10 Sep 2017 10:27:19 -0400 |
parents | a7fd04208764 |
children | 07771982ef9b |
comparison
equal
deleted
inserted
replaced
5:a7fd04208764 | 6:4da23f009c9e |
---|---|
83 for chrom in all_query_positions: | 83 for chrom in all_query_positions: |
84 all_query_positions[chrom] = sorted( | 84 all_query_positions[chrom] = sorted( |
85 list(set(all_query_positions[chrom]))) | 85 list(set(all_query_positions[chrom]))) |
86 return all_query_positions | 86 return all_query_positions |
87 | 87 |
88 def countpairs(self, uppers, lowers): | |
89 query_range = self.query_range | |
90 target_range = self.target_range | |
91 uppers = [seq for seq in uppers if (len(seq) in query_range or len(seq) in target_range)] | |
92 uppers_expanded = [] | |
93 for seq in uppers: | |
94 expand = [seq for i in range(self.readdic[seq])] | |
95 uppers_expanded.extend(expand) | |
96 uppers = uppers_expanded | |
97 lowers = [seq for seq in lowers if (len(seq) in query_range or len(seq) in target_range)] | |
98 lowers_expanded = [] | |
99 for seq in lowers: | |
100 expand = [seq for i in range(self.readdic[seq])] | |
101 lowers_expanded.extend(expand) | |
102 lowers = lowers_expanded | |
103 paired = [] | |
104 for upread in uppers: | |
105 for downread in lowers: | |
106 if (len(upread) in query_range and len(downread) in | |
107 target_range) or (len(upread) in target_range and | |
108 len(downread) in query_range): | |
109 paired.append(upread) | |
110 lowers.remove(downread) | |
111 break | |
112 return len(paired) | |
113 | |
88 def pairing(self): | 114 def pairing(self): |
89 F = open(self.output, 'w') | 115 F = open(self.output, 'w') |
90 query_range = self.query_range | 116 query_range = self.query_range |
91 target_range = self.target_range | 117 target_range = self.target_range |
92 overlap = self.overlap | 118 overlap = self.overlap |
93 stringresult = [] | 119 stringresult = [] |
94 header_template = '>%s|coord=%s|strand %s|size=%s|nreads=%s\n%s\n' | 120 header_template = '>%s|coord=%s|strand %s|size=%s|nreads=%s\n%s\n' |
121 total_pairs = 0 | |
122 print ('Chromosome\tNbre of pairs') | |
95 for chrom in sorted(self.chromosomes): | 123 for chrom in sorted(self.chromosomes): |
124 number_pairs = 0 | |
96 for pos in self.all_query_positions[chrom]: | 125 for pos in self.all_query_positions[chrom]: |
97 stringbuffer = [] | 126 stringbuffer = [] |
98 uppers = self.alignement_dic[chrom, pos, 'F'] | 127 uppers = self.alignement_dic[chrom, pos, 'F'] |
99 lowers = self.alignement_dic[chrom, pos+overlap-1, 'R'] | 128 lowers = self.alignement_dic[chrom, pos+overlap-1, 'R'] |
129 number_pairs += self.countpairs(uppers, lowers) | |
130 total_pairs += number_pairs | |
100 if uppers and lowers: | 131 if uppers and lowers: |
101 for upread in uppers: | 132 for upread in uppers: |
102 for downread in lowers: | 133 for downread in lowers: |
103 if (len(upread) in query_range and len(downread) in | 134 if (len(upread) in query_range and len(downread) in |
104 target_range) or (len(upread) in target_range | 135 target_range) or (len(upread) in target_range |
112 header_template % | 143 header_template % |
113 (chrom, pos+overlap-len(downread)+1, '-', | 144 (chrom, pos+overlap-len(downread)+1, '-', |
114 len(downread), self.readdic[downread], | 145 len(downread), self.readdic[downread], |
115 self.revcomp(downread))) | 146 self.revcomp(downread))) |
116 stringresult.extend(sorted(set(stringbuffer))) | 147 stringresult.extend(sorted(set(stringbuffer))) |
148 print('%s\t%s' % (chrom, number_pairs)) | |
149 print('Total nbre of pairs that can be simultaneously formed\t%s' % total_pairs) | |
117 F.write(''.join(stringresult)) | 150 F.write(''.join(stringresult)) |
118 | 151 |
119 def revcomp(self, sequence): | 152 def revcomp(self, sequence): |
120 antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"} | 153 antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"} |
121 revseq = sequence[::-1] | 154 revseq = sequence[::-1] |