Mercurial > repos > artbio > small_read_size_histograms
annotate size_histogram.py @ 0:234b83159ea8 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
author | artbio |
---|---|
date | Tue, 11 Jul 2017 11:44:36 -0400 |
parents | |
children |
rev | line source |
---|---|
0
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
1 #!/usr/bin/python |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
2 # python parser module for size distributions, guided by GFF3 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
3 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
4 import argparse |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
5 import subprocess |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
6 from collections import OrderedDict |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
7 from smRtools import extractsubinstance |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
8 from smRtools import HandleSmRNAwindows |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
9 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
10 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
11 def Parser(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
12 the_parser = argparse.ArgumentParser() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
13 the_parser.add_argument('--output_size_distribution', action="store", type=str, help="size distribution dataframe") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
14 the_parser.add_argument('--reference_fasta', action="store", type=str, help="output file") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
15 the_parser.add_argument('--reference_bowtie_index',action='store', help="paths to indexed or fasta references") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
16 the_parser.add_argument('--input',nargs='+', help="paths to multiple input files") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
17 the_parser.add_argument('--ext',nargs='+', help="input file type") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
18 the_parser.add_argument('--label',nargs='+', help="labels of multiple input files") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
19 the_parser.add_argument('--normalization_factor',nargs='+', type=float, help="Normalization factor for input file") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
20 the_parser.add_argument('--gff', type=str, help="GFF containing regions of interest") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
21 the_parser.add_argument('--minquery', type=int, help="Minimum readsize") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
22 the_parser.add_argument('--maxquery', type=int, help="Maximum readsize") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
23 the_parser.add_argument('--global_size', action="store_true", help="if specified, size distribution is calculated for the sum of all items") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
24 the_parser.add_argument('--collapse', action="store_true", help="if specified, forward and reverse reads are collapsed") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
25 args = the_parser.parse_args() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
26 return args |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
27 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
28 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
29 args=Parser() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
30 if args.reference_fasta: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
31 genomeRefFormat = "fastaSource" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
32 genomeRefFile = args.reference_fasta |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
33 if args.reference_bowtie_index: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
34 genomeRefFormat = "bowtieIndex" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
35 genomeRefFile = args.reference_bowtie_index |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
36 size_distribution_file=args.output_size_distribution |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
37 minquery=args.minquery |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
38 maxquery=args.maxquery |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
39 filePath=args.input |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
40 fileExt=args.ext |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
41 fileLabel=args.label |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
42 normalization_factor=args.normalization_factor |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
43 global_size=args.global_size |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
44 collapse=args.collapse |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
45 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
46 if collapse: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
47 pol=["both"] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
48 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
49 pol=["F", "R"] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
50 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
51 MasterListOfGenomes = OrderedDict() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
52 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
53 def process_samples(filePath): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
54 for i, filePath in enumerate(filePath): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
55 norm=normalization_factor[i] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
56 print fileLabel[i] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
57 MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\ |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
58 biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
59 return MasterListOfGenomes |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
60 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
61 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
62 def write_size_distribution_dataframe(readDict, size_distribution_file, pol=["both"] ): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
63 '''refactored on 7-9-2014''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
64 with open(size_distribution_file, 'w') as size_distrib: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
65 print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
66 for sample in readDict.keys(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
67 if args.gff: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
68 dict=readDict[sample] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
69 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
70 dict=readDict[sample].instanceDict |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
71 for gene in dict.keys(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
72 histogram = dict[gene].size_histogram() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
73 for polarity in pol: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
74 for size, count in histogram[polarity].iteritems(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
75 print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, size, count, sample) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
76 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
77 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
78 def write_size_distribution_dataframe_global(readDict, size_distribution_file, pol=["both"]): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
79 with open(size_distribution_file, 'w') as size_distrib: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
80 print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
81 for sample in readDict.keys(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
82 histogram = readDict[sample].size_histogram() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
83 gene="sample" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
84 for polarity in pol: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
85 for size, count in histogram[polarity].iteritems(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
86 print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, size, count, sample) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
87 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
88 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
89 def gff_item_subinstances(readDict, gff3): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
90 GFFinstanceDict=OrderedDict() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
91 with open(gff3) as gff: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
92 for line in gff: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
93 if line[0] == "#": continue |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
94 gff_fields = line[:-1].split("\t") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
95 chrom = gff_fields[0] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
96 gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
97 item_upstream_coordinate = int(gff_fields[3]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
98 item_downstream_coordinate = int(gff_fields[4]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
99 item_polarity = gff_fields[6] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
100 for sample in readDict.keys(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
101 if sample not in GFFinstanceDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
102 GFFinstanceDict[sample]={} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
103 subinstance=extractsubinstance(item_upstream_coordinate, item_downstream_coordinate, readDict[sample].instanceDict[chrom]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
104 if item_polarity == '-': |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
105 subinstance.readDict={key*-1:value for key, value in subinstance.readDict.iteritems()} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
106 # subinstance.readDict.setdefault(key, []) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
107 subinstance.gene=gff_name |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
108 GFFinstanceDict[sample][gff_name]=subinstance |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
109 return GFFinstanceDict |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
110 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
111 MasterListOfGenomes=process_samples(filePath) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
112 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
113 if args.gff: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
114 MasterListOfGenomes=gff_item_subinstances(MasterListOfGenomes, args.gff) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
115 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
116 if global_size: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
117 write_size_distribution_dataframe_global(MasterListOfGenomes, size_distribution_file, pol) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
118 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
119 write_size_distribution_dataframe(MasterListOfGenomes, size_distribution_file, pol) |