comparison mircounts.py @ 13:b045c30fb768 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
author artbio
date Fri, 18 Oct 2019 19:18:50 -0400
parents 2a08a6eb471c
children
comparison
equal deleted inserted replaced
12:6d3e98cba73a 13:b045c30fb768
41 Takes a AlignmentFile object and returns a dictionary of lists 41 Takes a AlignmentFile object and returns a dictionary of lists
42 of coverage along the coordinates of pre_mirs (as keys) 42 of coverage along the coordinates of pre_mirs (as keys)
43 """ 43 """
44 coverage = dict() 44 coverage = dict()
45 for ref_name, ref_len in zip(bamfile.references, bamfile.lengths): 45 for ref_name, ref_len in zip(bamfile.references, bamfile.lengths):
46 coverage[ref_name] = bamfile.count_coverage(reference=ref_name, 46 coverage[ref_name] = bamfile.count_coverage(contig=ref_name,
47 start=0, end=ref_len, 47 start=0, stop=ref_len,
48 quality_threshold=quality) 48 quality_threshold=quality)
49 """ Add the 4 coverage values """ 49 """ Add the 4 coverage values """
50 coverage[ref_name] = [sum(x) for x in 50 coverage[ref_name] = [sum(x) for x in
51 zip(*coverage[ref_name])] 51 zip(*coverage[ref_name])]
52 return coverage 52 return coverage
66 mir_name = gff_fields[0] 66 mir_name = gff_fields[0]
67 premir_name = gff_fields[8].split('Parent_mir_Name=')[-1] 67 premir_name = gff_fields[8].split('Parent_mir_Name=')[-1]
68 mir_start = int(gff_fields[3]) 68 mir_start = int(gff_fields[3])
69 mir_end = int(gff_fields[4]) 69 mir_end = int(gff_fields[4])
70 # GFF is 1-based, pysam is 0-based. 70 # GFF is 1-based, pysam is 0-based.
71 counts[mir_name] = bamfile.count(reference=premir_name, 71 counts[mir_name] = bamfile.count(contig=premir_name,
72 start=mir_start-1, 72 start=mir_start-1,
73 end=mir_end-1) 73 stop=mir_end-1)
74 return counts 74 return counts
75 75
76 76
77 def write_dataframe_coverage(countdict, outfile): 77 def write_dataframe_coverage(countdict, outfile):
78 """ 78 """