Mercurial > repos > drosofff > msp_sr_readmap_and_size_histograms
annotate readmap.py @ 4:4efe210c91f3 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 9237338d798251fb2667280d597746e852f3ffcc-dirty
author | mvdbeek |
---|---|
date | Wed, 03 Feb 2016 11:35:00 -0500 |
parents | 9b62e6b0d219 |
children | bcc0c7093e7a |
rev | line source |
---|---|
0
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/python |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
2 # python parser module for for readmaps and size distributions, guided by GFF3 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
3 # version 0.9.1 (1-6-2014) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
4 # Usage readmap.py <1:index source> <2:extraction directive> <3:output pre-mir> <4: output mature miRs> <5:mirbase GFF3> |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
5 # <6:pathToLatticeDataframe or "dummy_dataframe_path"> <7:Rcode or "dummy_plotCode"> <8:latticePDF or "dummy_latticePDF"> |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
6 # <9:10:11 filePath:FileExt:FileLabel> <.. ad lib> |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
7 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
8 import sys, subprocess, argparse |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
9 from smRtools import * |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
10 from collections import OrderedDict, defaultdict |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
11 import os |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
12 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
13 def Parser(): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
14 the_parser = argparse.ArgumentParser() |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
15 the_parser.add_argument('--output_readmap', action="store", type=str, help="readmap dataframe") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
16 the_parser.add_argument('--output_size_distribution', action="store", type=str, help="size distribution dataframe") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
17 the_parser.add_argument('--reference_fasta', action="store", type=str, help="output file") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
18 the_parser.add_argument('--reference_bowtie_index',action='store', help="paths to indexed or fasta references") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
19 the_parser.add_argument('--input',nargs='+', help="paths to multiple input files") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
20 the_parser.add_argument('--ext',nargs='+', help="input file type") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
21 the_parser.add_argument('--label',nargs='+', help="labels of multiple input files") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
22 the_parser.add_argument('--normalization_factor',nargs='+', type=float, help="Normalization factor for input file") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
23 the_parser.add_argument('--gff', type=str, help="GFF containing regions of interest") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
24 the_parser.add_argument('--minquery', type=int, help="Minimum readsize") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
25 the_parser.add_argument('--maxquery', type=int, help="Maximum readsize") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
26 the_parser.add_argument('--rcode', type=str, help="R script") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
27 args = the_parser.parse_args() |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
28 return args |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
29 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
30 args=Parser() |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
31 if args.reference_fasta: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
32 genomeRefFormat = "fastaSource" |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
33 genomeRefFile = args.reference_fasta |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
34 if args.reference_bowtie_index: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
35 genomeRefFormat = "bowtieIndex" |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
36 genomeRefFile = args.reference_bowtie_index |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
37 readmap_file=args.output_readmap |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
38 size_distribution_file=args.output_size_distribution |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
39 minquery=args.minquery |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
40 maxquery=args.maxquery |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
41 Rcode = args.rcode |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
42 filePath=args.input |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
43 fileExt=args.ext |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
44 fileLabel=args.label |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
45 normalization_factor=args.normalization_factor |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
46 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
47 MasterListOfGenomes = OrderedDict() |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
48 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
49 def process_samples(filePath): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
50 for i, filePath in enumerate(filePath): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
51 norm=normalization_factor[i] |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
52 print fileLabel[i] |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
53 MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\ |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
54 biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
55 return MasterListOfGenomes |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
56 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
57 def dataframe_sanityzer (listofdatalines): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
58 Dict = defaultdict(float) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
59 for line in listofdatalines: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
60 fields= line.split("\t") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
61 Dict[fields[0]] += float (fields[2]) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
62 filtered_list = [] |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
63 for line in listofdatalines: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
64 fields= line.split("\t") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
65 if Dict[fields[0]] != 0: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
66 filtered_list.append(line) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
67 return filtered_list |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
68 |
2
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
69 |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
70 def listify_plottable_item(item): |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
71 """ |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
72 plottable is a list of strings: |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
73 'FBti0020401\t78\t-1.0\tR' |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
74 split on tab and return gene, coordinate, count and orientation |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
75 """ |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
76 gene, coordinate, count, orientation = item.split("\t") |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
77 return gene, coordinate, count, orientation |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
78 |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
79 def lookup_gene_length(gene, readDict): |
4
4efe210c91f3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 9237338d798251fb2667280d597746e852f3ffcc-dirty
mvdbeek
parents:
3
diff
changeset
|
80 return readDict[readDict.keys()[0]].instanceDict.values()[0].size |
2
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
81 |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
82 def handle_start_stop_coordinates(plottable, readDict): |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
83 """ |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
84 To ensure that the plot area always includes the correct start and end coordinates, |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
85 we add an entry at start [coordinate 0] and end [last coordinate] of count 0, if these do not exist. |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
86 """ |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
87 first_line = plottable[0] |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
88 last_line = plottable[-1] |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
89 gene, coordinate, count, orientation = listify_plottable_item(first_line) |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
90 if not coordinate == "0": |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
91 new_line = "\t".join([gene, "0", "0", "F"]) |
3
9b62e6b0d219
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 032b3f084f3b2a6d42ba476ef55f4de593b58606-dirty
mvdbeek
parents:
2
diff
changeset
|
92 plottable = [new_line] + plottable |
2
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
93 gene_length = str(lookup_gene_length(gene, readDict)) |
4
4efe210c91f3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 9237338d798251fb2667280d597746e852f3ffcc-dirty
mvdbeek
parents:
3
diff
changeset
|
94 gene, coordinate, count, orientation = listify_plottable_item(last_line) |
2
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
95 if not coordinate == gene_length: |
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
96 last_line = "\t".join([gene, gene_length, "0", "F"]) |
3
9b62e6b0d219
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 032b3f084f3b2a6d42ba476ef55f4de593b58606-dirty
mvdbeek
parents:
2
diff
changeset
|
97 plottable = plottable + [last_line] |
4
4efe210c91f3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 9237338d798251fb2667280d597746e852f3ffcc-dirty
mvdbeek
parents:
3
diff
changeset
|
98 return plottable |
2
ebfc73c72652
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
mvdbeek
parents:
0
diff
changeset
|
99 |
0
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
100 def write_readplot_dataframe(readDict, readmap_file): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
101 listoflines = [] |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
102 with open(readmap_file, 'w') as readmap: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
103 print >>readmap, "gene\tcoord\tcount\tpolarity\tsample" |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
104 for sample in readDict.keys(): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
105 if args.gff: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
106 dict=readDict[sample] |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
107 else: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
108 dict=readDict[sample].instanceDict |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
109 for gene in dict.keys(): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
110 plottable = dict[gene].readplot() |
4
4efe210c91f3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 9237338d798251fb2667280d597746e852f3ffcc-dirty
mvdbeek
parents:
3
diff
changeset
|
111 plottable = handle_start_stop_coordinates(plottable, readDict) |
0
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
112 for line in plottable: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
113 #print >>readmap, "%s\t%s" % (line, sample) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
114 listoflines.append ("%s\t%s" % (line, sample)) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
115 listoflines = dataframe_sanityzer(listoflines) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
116 for line in listoflines: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
117 print >>readmap, line |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
118 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
119 def write_size_distribution_dataframe(readDict, size_distribution_file): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
120 listoflines = [] |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
121 with open(size_distribution_file, 'w') as size_distrib: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
122 print >>size_distrib, "gene\tsize\tcount\tpolarity\tsample" # test before was "gene\tpolarity\tsize\tcount\tsample" |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
123 for sample in readDict.keys(): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
124 if args.gff: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
125 dict=readDict[sample] |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
126 else: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
127 dict=readDict[sample].instanceDict |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
128 for gene in dict.keys(): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
129 histogram = dict[gene].size_histogram(minquery=args.minquery, maxquery=args.maxquery) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
130 for polarity in histogram.keys(): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
131 if polarity=='both': |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
132 continue |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
133 #for size in xrange(args.minquery, args.maxquery): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
134 # if not size in histogram[polarity].keys(): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
135 # histogram[size]=0 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
136 for size, count in histogram[polarity].iteritems(): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
137 #print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) # test, changed the order accordingly |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
138 listoflines.append ("%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) ) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
139 listoflines = dataframe_sanityzer(listoflines) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
140 for line in listoflines: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
141 print >>size_distrib, line |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
142 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
143 def gff_item_subinstances(readDict, gff3): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
144 GFFinstanceDict=OrderedDict() |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
145 for sample in readDict.keys(): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
146 GFFinstanceDict[sample]={} # to implement the 2nd level of directionary in an OrderedDict Class object (would not be required with defaultdict Class) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
147 with open(gff3) as gff: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
148 for line in gff: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
149 if line[0] == "#": continue |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
150 gff_fields = line[:-1].split("\t") |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
151 chrom = gff_fields[0] |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
152 gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
153 item_upstream_coordinate = int(gff_fields[3]) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
154 item_downstream_coordinate = int(gff_fields[4]) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
155 item_polarity = gff_fields[6] |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
156 for sample in readDict.keys(): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
157 ## this is not required anymore but test |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
158 # if not GFFinstanceDict.has_key(sample): |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
159 # GFFinstanceDict[sample]={} |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
160 #### |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
161 subinstance=extractsubinstance(item_upstream_coordinate, item_downstream_coordinate, readDict[sample].instanceDict[chrom]) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
162 if item_polarity == '-': |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
163 subinstance.readDict={key*-1:value for key, value in subinstance.readDict.iteritems()} |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
164 subinstance.gene=gff_name |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
165 GFFinstanceDict[sample][gff_name]=subinstance |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
166 return GFFinstanceDict |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
167 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
168 MasterListOfGenomes=process_samples(filePath) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
169 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
170 if args.gff: |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
171 MasterListOfGenomes=gff_item_subinstances(MasterListOfGenomes, args.gff) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
172 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
173 write_readplot_dataframe(MasterListOfGenomes, readmap_file) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
174 write_size_distribution_dataframe(MasterListOfGenomes, size_distribution_file) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
175 |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
176 R_command="Rscript "+ Rcode |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
177 process = subprocess.Popen(R_command.split()) |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
178 process.wait() |
ac7d8e55bb67
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
drosofff
parents:
diff
changeset
|
179 |