Mercurial > repos > artbio > small_rna_signatures
comparison overlapping_reads.py @ 4:20d28cfdeefe draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit cfdc08418887bfe4a35588cd78d0a2b6ffa6e19e
author | artbio |
---|---|
date | Fri, 08 Sep 2017 04:44:22 -0400 |
parents | 4d9682bd3a6b |
children | a7fd04208764 |
comparison
equal
deleted
inserted
replaced
3:4d9682bd3a6b | 4:20d28cfdeefe |
---|---|
49 all_query_positions = defaultdict(list) | 49 all_query_positions = defaultdict(list) |
50 for chrom in self.chromosomes: | 50 for chrom in self.chromosomes: |
51 for read in bam_object.fetch(chrom): | 51 for read in bam_object.fetch(chrom): |
52 if not read.is_reverse: | 52 if not read.is_reverse: |
53 all_query_positions[chrom].append( | 53 all_query_positions[chrom].append( |
54 read.get_reference_positions(full_length=True)[0]) | 54 read.reference_start) |
55 else: | 55 else: |
56 all_query_positions[chrom].append( | 56 all_query_positions[chrom].append( |
57 read.get_reference_positions(full_length=True)[-1]) | 57 read.reference_end) |
58 all_query_positions[chrom] = sorted( | 58 all_query_positions[chrom] = sorted( |
59 list(set(all_query_positions[chrom]))) | 59 list(set(all_query_positions[chrom]))) |
60 return all_query_positions | 60 return all_query_positions |
61 | 61 |
62 def direct_pairing(self, minquery, maxquery, mintarget, maxtarget, | 62 def direct_pairing(self, minquery, maxquery, mintarget, maxtarget, |
75 start=pos, end=pos+overlap-1) | 75 start=pos, end=pos+overlap-1) |
76 iterreads_4 = self.bam_object.fetch(chrom, | 76 iterreads_4 = self.bam_object.fetch(chrom, |
77 start=pos, end=pos+overlap-1) | 77 start=pos, end=pos+overlap-1) |
78 # 1 | 78 # 1 |
79 for queryread in iterreads_1: | 79 for queryread in iterreads_1: |
80 if queryread.get_reference_positions( | 80 if queryread.reference_start == pos and \ |
81 full_length=True)[0] == pos and \ | |
82 queryread.query_alignment_length in query_range \ | 81 queryread.query_alignment_length in query_range \ |
83 and not queryread.is_reverse: | 82 and not queryread.is_reverse: |
84 for targetread in iterreads_2: | 83 for targetread in iterreads_2: |
85 if (targetread. | 84 if (targetread. |
86 get_reference_positions(full_length=True)[-1] | 85 get_reference_positions()[-1] |
87 == queryread.get_reference_positions( | 86 == queryread.get_reference_positions( |
88 full_length=True)[overlap-1] and | 87 )[overlap-1] and |
89 targetread.query_alignment_length in | 88 targetread.query_alignment_length in |
90 target_range and targetread.is_reverse): | 89 target_range and targetread.is_reverse): |
91 targetreadseq = self.revcomp( | 90 targetreadseq = self.revcomp( |
92 targetread.query_sequence) | 91 targetread.query_sequence) |
93 stringresult.append( | 92 stringresult.append( |
94 '>%s|%s|%s|%s|n=%s\n%s\n' % | 93 '>%s|%s|%s|%s|n=%s\n%s\n' % |
95 (chrom, queryread.get_reference_positions( | 94 (chrom, queryread.reference_start+1, |
96 full_length=True)[0]+1, | |
97 'F', queryread.query_alignment_length, | 95 'F', queryread.query_alignment_length, |
98 self.readdic[queryread.query_sequence], | 96 self.readdic[queryread.query_sequence], |
99 queryread.query_sequence)) | 97 queryread.query_sequence)) |
100 stringresult.append( | 98 stringresult.append( |
101 '>%s|%s|%s|%s|n=%s\n%s\n' % | 99 '>%s|%s|%s|%s|n=%s\n%s\n' % |
102 (chrom, targetread.get_reference_positions( | 100 (chrom, targetread.reference_start+1, |
103 full_length=True)[0]+1, | |
104 'R', targetread.query_alignment_length, | 101 'R', targetread.query_alignment_length, |
105 self.readdic[targetread.query_sequence], | 102 self.readdic[targetread.query_sequence], |
106 targetreadseq)) | 103 targetreadseq)) |
107 # 2 | 104 # 2 |
108 for queryread in iterreads_3: | 105 for queryread in iterreads_3: |
109 if queryread.get_reference_positions( | 106 if queryread.reference_end-1 == pos+overlap-1 and \ |
110 full_length=True)[-1] == pos+overlap-1 and \ | |
111 queryread.query_alignment_length in query_range \ | 107 queryread.query_alignment_length in query_range \ |
112 and queryread.is_reverse: | 108 and queryread.is_reverse: |
113 for targetread in iterreads_4: | 109 for targetread in iterreads_4: |
114 if (targetread. | 110 if (targetread. |
115 get_reference_positions(full_length=True)[0] | 111 reference_start |
116 == pos and targetread.query_alignment_length | 112 == pos and targetread.query_alignment_length |
117 in target_range and not | 113 in target_range and not |
118 targetread.is_reverse): | 114 targetread.is_reverse): |
119 queryreadseq = self.revcomp( | 115 queryreadseq = self.revcomp( |
120 queryread.query_sequence) | 116 queryread.query_sequence) |
121 targetreadseq = targetread.query_sequence | 117 targetreadseq = targetread.query_sequence |
122 stringresult.append( | 118 stringresult.append( |
123 '>%s|%s|%s|%s|n=%s\n%s\n' % | 119 '>%s|%s|%s|%s|n=%s\n%s\n' % |
124 (chrom, queryread.get_reference_positions( | 120 (chrom, queryread.reference_start+1, 'R', |
125 full_length=True)[0]+1, 'R', | |
126 queryread.query_alignment_length, | 121 queryread.query_alignment_length, |
127 self.readdic[queryread.query_sequence], | 122 self.readdic[queryread.query_sequence], |
128 queryreadseq)) | 123 queryreadseq)) |
129 stringresult.append( | 124 stringresult.append( |
130 '>%s|%s|%s|%s|n=%s\n%s\n' % | 125 '>%s|%s|%s|%s|n=%s\n%s\n' % |
131 (chrom, targetread.get_reference_positions( | 126 (chrom, targetread.reference_start+1, |
132 full_length=True)[0]+1, | |
133 'F', targetread.query_alignment_length, | 127 'F', targetread.query_alignment_length, |
134 self.readdic[targetread.query_sequence], | 128 self.readdic[targetread.query_sequence], |
135 targetreadseq)) | 129 targetreadseq)) |
136 stringresult = sorted(set(stringresult), | 130 stringresult = sorted(set(stringresult), |
137 key=lambda x: stringresult.index(x)) | 131 key=lambda x: stringresult.index(x)) |