Mercurial > repos > yating-l > jbrowsearchivecreator
annotate utils.py @ 5:e762f4b9e4bd draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit a3a8cdf5c9b100db56b2534e8718b5fc976435ff
author | yating-l |
---|---|
date | Fri, 07 Jul 2017 17:21:23 -0400 |
parents | 7e471cdd9e71 |
children |
rev | line source |
---|---|
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
1 #!/usr/bin/env python |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
2 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
3 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
4 This file include common used functions for converting file format to gff3 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
5 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
6 from collections import OrderedDict |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
7 import json |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
8 import subprocess |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
9 import os |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
10 import tempfile |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
11 import string |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
12 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
13 def write_features(field, attribute, gff3): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
14 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
15 The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
16 field, attribute are ordered dictionary |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
17 gff3 is the file handler |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
18 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
19 attr = [] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
20 for v in field.values(): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
21 gff3.write(str(v) + '\t') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
22 for k, v in attribute.items(): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
23 s = str(k) + '=' + str(v) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
24 attr.append(s) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
25 gff3.write(';'.join(attr)) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
26 gff3.write('\n') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
27 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
28 def getChromSizes(reference, tool_dir): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
29 #TODO: find a better way instead of shipping the two exec files with the tool |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
30 faToTwoBit = os.path.join(tool_dir, 'faToTwoBit') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
31 twoBitInfo = os.path.join(tool_dir, 'twoBitInfo') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
32 try: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
33 twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
34 chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0, suffix='.chrom.sizes', delete=False) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
35 except IOError as err: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
36 print "Cannot create tempfile err({0}): {1}".format(err.errno, err.strerror) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
37 try: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
38 subprocess.call(['faToTwoBit', reference, twoBitFile.name]) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
39 except OSError as err: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
40 print "Cannot generate twoBitFile from faToTwoBit err({0}): {1}".format(err.errno, err.strerror) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
41 try: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
42 subprocess.call(['twoBitInfo', twoBitFile.name, chrom_sizes.name]) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
43 except OSError as err: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
44 print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
45 return chrom_sizes |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
46 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
47 def sequence_region(chrom_sizes): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
48 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
49 This function read from a chromatin size file generated by twoBitInfo and write the information to dict |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
50 return a dict |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
51 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
52 f = open(chrom_sizes, 'r') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
53 sizes = f.readlines() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
54 sizes_dict = {} |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
55 for line in sizes: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
56 chrom_info = line.rstrip().split('\t') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
57 sizes_dict[chrom_info[0]] = chrom_info[1] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
58 return sizes_dict |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
59 |
4
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
60 def child_blocks(parent_field, parent_attr, gff3, child_type): |
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
61 num = 0 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
62 blockcount = int(parent_attr['blockcount']) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
63 chromstart = parent_attr['chromstarts'].split(',') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
64 blocksize = parent_attr['blocksizes'].split(',') |
4
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
65 parent_start = parent_field['start'] |
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
66 while num < blockcount: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
67 child_attr = OrderedDict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
68 child_field = parent_field |
4
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
69 child_field['type'] = child_type |
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
70 child_field['start'] = int(chromstart[num]) + int(parent_start) |
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
71 child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1 |
4
7e471cdd9e71
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 8d93b27353190eb23490c9480e560d84cb60c973
yating-l
parents:
0
diff
changeset
|
72 child_attr['ID'] = parent_attr['ID'] + '_part_' + str(num+1) |
0
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
73 child_attr['Parent'] = parent_attr['ID'] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
74 write_features(child_field, child_attr, gff3) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
75 num = num + 1 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
76 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
77 def add_tracks_to_json(trackList_json, new_tracks, modify_type): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
78 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
79 Add to track configuration (trackList.json) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
80 # modify_type = 'add_tracks': add a new track like bam or bigwig, new_track = dict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
81 # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict()) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
82 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
83 with open(trackList_json, 'r+') as f: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
84 data = json.load(f) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
85 if modify_type == 'add_tracks': |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
86 data['tracks'].append(new_tracks) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
87 elif modify_type == 'add_attr': |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
88 for k in new_tracks: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
89 for track in data['tracks']: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
90 if k.lower() in track['urlTemplate'].lower(): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
91 attr = new_tracks[k] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
92 for k, v in attr.items(): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
93 track[k] = v |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
94 f.seek(0, 0) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
95 f.write(json.dumps(data, separators=(',' , ':'), indent=4)) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
96 f.truncate() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
97 f.close() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
98 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
99 def gtfToGff3(gtf_file, gff3_file, chrom_sizes): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
100 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
101 Covert gtf file output from StringTie to gff3 format |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
102 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
103 gff3 = open(gff3_file, 'w') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
104 gff3.write("##gff-version 3\n") |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
105 sizes_dict = sequence_region(chrom_sizes) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
106 seq_regions = dict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
107 parents = dict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
108 with open(gtf_file, 'r') as gtf: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
109 for line in gtf: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
110 if line.startswith('#'): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
111 continue |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
112 field = OrderedDict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
113 attribute = OrderedDict() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
114 li = line.rstrip().split("\t") |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
115 #print li |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
116 field['seqid'] = li[0] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
117 #print field['seqid'] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
118 if field['seqid'] not in seq_regions: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
119 end_region = sizes_dict[field['seqid']] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
120 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
121 seq_regions[field['seqid']] = end_region |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
122 field['source'] = li[1] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
123 field['type'] = li[2] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
124 # The first base in a chromosome is numbered 0 in BED format |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
125 field['start'] = li[3] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
126 field['end'] = li[4] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
127 field['score'] = li[5] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
128 field['strand'] = li[6] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
129 field['phase'] = li[7] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
130 attr_li = li[8].split(';') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
131 gene_id = attr_li[0].split()[1].strip('"') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
132 attribute['ID'] = gene_id + '_' + field['type'] + '_' + str(field['start']) + '_' + str(field['end']) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
133 if field['type'] == 'transcript': |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
134 parents[gene_id] = attribute['ID'] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
135 attribute['transcript_id'] = attr_li[1].split()[1].strip('"') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
136 attribute['coverage'] = attr_li[2].split()[1].strip('"') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
137 attribute['fpkm'] = attr_li[3].split()[1].strip('"') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
138 attribute['tpm'] = attr_li[4].split()[1].strip('"') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
139 elif field['type'] == 'exon': |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
140 attribute['Parent'] = parents[gene_id] |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
141 attribute['transcript_id'] = attr_li[1].split()[1].strip('"') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
142 attribute['coverage'] = attr_li[3].split()[1].strip('"') |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
143 write_features(field, attribute, gff3) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
144 gff3.close() |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
145 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
146 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
147 def sanitize_name(input_name): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
148 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
149 Galaxy will name all the files and dirs as *.dat, |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
150 the function can replace '.' to '_' for the dirs |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
151 """ |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
152 validChars = "_-%s%s" % (string.ascii_letters, string.digits) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
153 sanitized_name = ''.join([c if c in validChars else '_' for c in input_name]) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
154 return "gonramp_" + sanitized_name |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
155 |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
156 def createBamIndex(bamfile): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
157 subprocess.call(['samtools', 'index', bamfile]) |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
158 filename = bamfile + '.bai' |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
159 if os.path.exists(filename): |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
160 return filename |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
161 else: |
804a93e87cc8
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f22711ea7a464bdaf4d5aaea07f2eacf967aa66e-dirty
yating-l
parents:
diff
changeset
|
162 raise ValueError('Did not find bai file') |