annotate size_histogram.py @ 0:234b83159ea8 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
author artbio
date Tue, 11 Jul 2017 11:44:36 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
1 #!/usr/bin/python
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
2 # python parser module for size distributions, guided by GFF3
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
3
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
4 import argparse
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
5 import subprocess
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
6 from collections import OrderedDict
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
7 from smRtools import extractsubinstance
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
8 from smRtools import HandleSmRNAwindows
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
9
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
10
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
11 def Parser():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
12 the_parser = argparse.ArgumentParser()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
13 the_parser.add_argument('--output_size_distribution', action="store", type=str, help="size distribution dataframe")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
14 the_parser.add_argument('--reference_fasta', action="store", type=str, help="output file")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
15 the_parser.add_argument('--reference_bowtie_index',action='store', help="paths to indexed or fasta references")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
16 the_parser.add_argument('--input',nargs='+', help="paths to multiple input files")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
17 the_parser.add_argument('--ext',nargs='+', help="input file type")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
18 the_parser.add_argument('--label',nargs='+', help="labels of multiple input files")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
19 the_parser.add_argument('--normalization_factor',nargs='+', type=float, help="Normalization factor for input file")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
20 the_parser.add_argument('--gff', type=str, help="GFF containing regions of interest")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
21 the_parser.add_argument('--minquery', type=int, help="Minimum readsize")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
22 the_parser.add_argument('--maxquery', type=int, help="Maximum readsize")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
23 the_parser.add_argument('--global_size', action="store_true", help="if specified, size distribution is calculated for the sum of all items")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
24 the_parser.add_argument('--collapse', action="store_true", help="if specified, forward and reverse reads are collapsed")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
25 args = the_parser.parse_args()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
26 return args
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
27
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
28
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
29 args=Parser()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
30 if args.reference_fasta:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
31 genomeRefFormat = "fastaSource"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
32 genomeRefFile = args.reference_fasta
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
33 if args.reference_bowtie_index:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
34 genomeRefFormat = "bowtieIndex"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
35 genomeRefFile = args.reference_bowtie_index
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
36 size_distribution_file=args.output_size_distribution
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
37 minquery=args.minquery
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
38 maxquery=args.maxquery
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
39 filePath=args.input
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
40 fileExt=args.ext
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
41 fileLabel=args.label
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
42 normalization_factor=args.normalization_factor
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
43 global_size=args.global_size
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
44 collapse=args.collapse
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
45
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
46 if collapse:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
47 pol=["both"]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
48 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
49 pol=["F", "R"]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
50
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
51 MasterListOfGenomes = OrderedDict()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
52
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
53 def process_samples(filePath):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
54 for i, filePath in enumerate(filePath):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
55 norm=normalization_factor[i]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
56 print fileLabel[i]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
57 MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
58 biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
59 return MasterListOfGenomes
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
60
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
61
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
62 def write_size_distribution_dataframe(readDict, size_distribution_file, pol=["both"] ):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
63 '''refactored on 7-9-2014'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
64 with open(size_distribution_file, 'w') as size_distrib:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
65 print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
66 for sample in readDict.keys():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
67 if args.gff:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
68 dict=readDict[sample]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
69 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
70 dict=readDict[sample].instanceDict
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
71 for gene in dict.keys():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
72 histogram = dict[gene].size_histogram()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
73 for polarity in pol:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
74 for size, count in histogram[polarity].iteritems():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
75 print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, size, count, sample)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
76
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
77
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
78 def write_size_distribution_dataframe_global(readDict, size_distribution_file, pol=["both"]):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
79 with open(size_distribution_file, 'w') as size_distrib:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
80 print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
81 for sample in readDict.keys():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
82 histogram = readDict[sample].size_histogram()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
83 gene="sample"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
84 for polarity in pol:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
85 for size, count in histogram[polarity].iteritems():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
86 print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, size, count, sample)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
87
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
88
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
89 def gff_item_subinstances(readDict, gff3):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
90 GFFinstanceDict=OrderedDict()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
91 with open(gff3) as gff:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
92 for line in gff:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
93 if line[0] == "#": continue
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
94 gff_fields = line[:-1].split("\t")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
95 chrom = gff_fields[0]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
96 gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
97 item_upstream_coordinate = int(gff_fields[3])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
98 item_downstream_coordinate = int(gff_fields[4])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
99 item_polarity = gff_fields[6]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
100 for sample in readDict.keys():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
101 if sample not in GFFinstanceDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
102 GFFinstanceDict[sample]={}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
103 subinstance=extractsubinstance(item_upstream_coordinate, item_downstream_coordinate, readDict[sample].instanceDict[chrom])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
104 if item_polarity == '-':
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
105 subinstance.readDict={key*-1:value for key, value in subinstance.readDict.iteritems()}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
106 # subinstance.readDict.setdefault(key, [])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
107 subinstance.gene=gff_name
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
108 GFFinstanceDict[sample][gff_name]=subinstance
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
109 return GFFinstanceDict
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
110
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
111 MasterListOfGenomes=process_samples(filePath)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
112
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
113 if args.gff:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
114 MasterListOfGenomes=gff_item_subinstances(MasterListOfGenomes, args.gff)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
115
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
116 if global_size:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
117 write_size_distribution_dataframe_global(MasterListOfGenomes, size_distribution_file, pol)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
118 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
119 write_size_distribution_dataframe(MasterListOfGenomes, size_distribution_file, pol)