annotate readmap.py @ 4:4efe210c91f3 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 9237338d798251fb2667280d597746e852f3ffcc-dirty
author mvdbeek
date Wed, 03 Feb 2016 11:35:00 -0500
parents 9b62e6b0d219
children bcc0c7093e7a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
1 #!/usr/bin/python
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
2 # python parser module for for readmaps and size distributions, guided by GFF3
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
3 # version 0.9.1 (1-6-2014)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
4 # Usage readmap.py <1:index source> <2:extraction directive> <3:output pre-mir> <4: output mature miRs> <5:mirbase GFF3>
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
5 # <6:pathToLatticeDataframe or "dummy_dataframe_path"> <7:Rcode or "dummy_plotCode"> <8:latticePDF or "dummy_latticePDF">
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
6 # <9:10:11 filePath:FileExt:FileLabel> <.. ad lib>
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
7
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
8 import sys, subprocess, argparse
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
9 from smRtools import *
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
10 from collections import OrderedDict, defaultdict
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
11 import os
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
12
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
13 def Parser():
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
14 the_parser = argparse.ArgumentParser()
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
15 the_parser.add_argument('--output_readmap', action="store", type=str, help="readmap dataframe")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
16 the_parser.add_argument('--output_size_distribution', action="store", type=str, help="size distribution dataframe")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
17 the_parser.add_argument('--reference_fasta', action="store", type=str, help="output file")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
18 the_parser.add_argument('--reference_bowtie_index',action='store', help="paths to indexed or fasta references")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
19 the_parser.add_argument('--input',nargs='+', help="paths to multiple input files")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
20 the_parser.add_argument('--ext',nargs='+', help="input file type")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
21 the_parser.add_argument('--label',nargs='+', help="labels of multiple input files")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
22 the_parser.add_argument('--normalization_factor',nargs='+', type=float, help="Normalization factor for input file")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
23 the_parser.add_argument('--gff', type=str, help="GFF containing regions of interest")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
24 the_parser.add_argument('--minquery', type=int, help="Minimum readsize")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
25 the_parser.add_argument('--maxquery', type=int, help="Maximum readsize")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
26 the_parser.add_argument('--rcode', type=str, help="R script")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
27 args = the_parser.parse_args()
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
28 return args
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
29
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
30 args=Parser()
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
31 if args.reference_fasta:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
32 genomeRefFormat = "fastaSource"
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
33 genomeRefFile = args.reference_fasta
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
34 if args.reference_bowtie_index:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
35 genomeRefFormat = "bowtieIndex"
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
36 genomeRefFile = args.reference_bowtie_index
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
37 readmap_file=args.output_readmap
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
38 size_distribution_file=args.output_size_distribution
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
39 minquery=args.minquery
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
40 maxquery=args.maxquery
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
41 Rcode = args.rcode
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
42 filePath=args.input
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
43 fileExt=args.ext
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
44 fileLabel=args.label
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
45 normalization_factor=args.normalization_factor
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
46
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
47 MasterListOfGenomes = OrderedDict()
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
48
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
49 def process_samples(filePath):
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
50 for i, filePath in enumerate(filePath):
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
51 norm=normalization_factor[i]
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
52 print fileLabel[i]
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
53 MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
54 biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
55 return MasterListOfGenomes
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
56
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
57 def dataframe_sanityzer (listofdatalines):
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
58 Dict = defaultdict(float)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
59 for line in listofdatalines:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
60 fields= line.split("\t")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
61 Dict[fields[0]] += float (fields[2])
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
62 filtered_list = []
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
63 for line in listofdatalines:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
64 fields= line.split("\t")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
65 if Dict[fields[0]] != 0:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
66 filtered_list.append(line)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
67 return filtered_list
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
68
2
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
69
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
70 def listify_plottable_item(item):
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
71 """
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
72 plottable is a list of strings:
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
73 'FBti0020401\t78\t-1.0\tR'
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
74 split on tab and return gene, coordinate, count and orientation
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
75 """
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
76 gene, coordinate, count, orientation = item.split("\t")
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
77 return gene, coordinate, count, orientation
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
78
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
79 def lookup_gene_length(gene, readDict):
4
4efe210c91f3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 9237338d798251fb2667280d597746e852f3ffcc-dirty
mvdbeek
parents: 3
diff changeset
80 return readDict[readDict.keys()[0]].instanceDict.values()[0].size
2
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
81
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
82 def handle_start_stop_coordinates(plottable, readDict):
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
83 """
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
84 To ensure that the plot area always includes the correct start and end coordinates,
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
85 we add an entry at start [coordinate 0] and end [last coordinate] of count 0, if these do not exist.
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
86 """
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
87 first_line = plottable[0]
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
88 last_line = plottable[-1]
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
89 gene, coordinate, count, orientation = listify_plottable_item(first_line)
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
90 if not coordinate == "0":
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
91 new_line = "\t".join([gene, "0", "0", "F"])
3
9b62e6b0d219 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 032b3f084f3b2a6d42ba476ef55f4de593b58606-dirty
mvdbeek
parents: 2
diff changeset
92 plottable = [new_line] + plottable
2
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
93 gene_length = str(lookup_gene_length(gene, readDict))
4
4efe210c91f3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 9237338d798251fb2667280d597746e852f3ffcc-dirty
mvdbeek
parents: 3
diff changeset
94 gene, coordinate, count, orientation = listify_plottable_item(last_line)
2
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
95 if not coordinate == gene_length:
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
96 last_line = "\t".join([gene, gene_length, "0", "F"])
3
9b62e6b0d219 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 032b3f084f3b2a6d42ba476ef55f4de593b58606-dirty
mvdbeek
parents: 2
diff changeset
97 plottable = plottable + [last_line]
4
4efe210c91f3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 9237338d798251fb2667280d597746e852f3ffcc-dirty
mvdbeek
parents: 3
diff changeset
98 return plottable
2
ebfc73c72652 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents: 0
diff changeset
99
0
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
100 def write_readplot_dataframe(readDict, readmap_file):
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
101 listoflines = []
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
102 with open(readmap_file, 'w') as readmap:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
103 print >>readmap, "gene\tcoord\tcount\tpolarity\tsample"
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
104 for sample in readDict.keys():
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
105 if args.gff:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
106 dict=readDict[sample]
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
107 else:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
108 dict=readDict[sample].instanceDict
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
109 for gene in dict.keys():
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
110 plottable = dict[gene].readplot()
4
4efe210c91f3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 9237338d798251fb2667280d597746e852f3ffcc-dirty
mvdbeek
parents: 3
diff changeset
111 plottable = handle_start_stop_coordinates(plottable, readDict)
0
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
112 for line in plottable:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
113 #print >>readmap, "%s\t%s" % (line, sample)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
114 listoflines.append ("%s\t%s" % (line, sample))
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
115 listoflines = dataframe_sanityzer(listoflines)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
116 for line in listoflines:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
117 print >>readmap, line
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
118
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
119 def write_size_distribution_dataframe(readDict, size_distribution_file):
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
120 listoflines = []
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
121 with open(size_distribution_file, 'w') as size_distrib:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
122 print >>size_distrib, "gene\tsize\tcount\tpolarity\tsample" # test before was "gene\tpolarity\tsize\tcount\tsample"
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
123 for sample in readDict.keys():
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
124 if args.gff:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
125 dict=readDict[sample]
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
126 else:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
127 dict=readDict[sample].instanceDict
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
128 for gene in dict.keys():
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
129 histogram = dict[gene].size_histogram(minquery=args.minquery, maxquery=args.maxquery)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
130 for polarity in histogram.keys():
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
131 if polarity=='both':
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
132 continue
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
133 #for size in xrange(args.minquery, args.maxquery):
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
134 # if not size in histogram[polarity].keys():
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
135 # histogram[size]=0
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
136 for size, count in histogram[polarity].iteritems():
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
137 #print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) # test, changed the order accordingly
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
138 listoflines.append ("%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) )
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
139 listoflines = dataframe_sanityzer(listoflines)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
140 for line in listoflines:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
141 print >>size_distrib, line
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
142
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
143 def gff_item_subinstances(readDict, gff3):
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
144 GFFinstanceDict=OrderedDict()
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
145 for sample in readDict.keys():
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
146 GFFinstanceDict[sample]={} # to implement the 2nd level of directionary in an OrderedDict Class object (would not be required with defaultdict Class)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
147 with open(gff3) as gff:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
148 for line in gff:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
149 if line[0] == "#": continue
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
150 gff_fields = line[:-1].split("\t")
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
151 chrom = gff_fields[0]
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
152 gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
153 item_upstream_coordinate = int(gff_fields[3])
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
154 item_downstream_coordinate = int(gff_fields[4])
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
155 item_polarity = gff_fields[6]
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
156 for sample in readDict.keys():
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
157 ## this is not required anymore but test
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
158 # if not GFFinstanceDict.has_key(sample):
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
159 # GFFinstanceDict[sample]={}
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
160 ####
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
161 subinstance=extractsubinstance(item_upstream_coordinate, item_downstream_coordinate, readDict[sample].instanceDict[chrom])
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
162 if item_polarity == '-':
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
163 subinstance.readDict={key*-1:value for key, value in subinstance.readDict.iteritems()}
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
164 subinstance.gene=gff_name
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
165 GFFinstanceDict[sample][gff_name]=subinstance
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
166 return GFFinstanceDict
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
167
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
168 MasterListOfGenomes=process_samples(filePath)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
169
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
170 if args.gff:
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
171 MasterListOfGenomes=gff_item_subinstances(MasterListOfGenomes, args.gff)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
172
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
173 write_readplot_dataframe(MasterListOfGenomes, readmap_file)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
174 write_size_distribution_dataframe(MasterListOfGenomes, size_distribution_file)
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
175
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
176 R_command="Rscript "+ Rcode
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
177 process = subprocess.Popen(R_command.split())
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
178 process.wait()
ac7d8e55bb67 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff changeset
179