Mercurial > repos > xuebing > sharplabtool
comparison tools/data_source/microbial_import_code.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 | |
2 def load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ): | |
3 # FIXME: this function is duplicated in the DynamicOptions class. It is used here only to | |
4 # set data.name in exec_after_process(). | |
5 microbe_info= {} | |
6 orgs = {} | |
7 | |
8 filename = "%s/microbial_data.loc" % GALAXY_DATA_INDEX_DIR | |
9 for i, line in enumerate( open( filename ) ): | |
10 line = line.rstrip( '\r\n' ) | |
11 if line and not line.startswith( '#' ): | |
12 fields = line.split( sep ) | |
13 #read each line, if not enough fields, go to next line | |
14 try: | |
15 info_type = fields.pop(0) | |
16 if info_type.upper() == "ORG": | |
17 #ORG 12521 Clostridium perfringens SM101 bacteria Firmicutes CP000312,CP000313,CP000314,CP000315 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=12521 | |
18 org_num = fields.pop(0) | |
19 name = fields.pop(0) | |
20 kingdom = fields.pop(0) | |
21 group = fields.pop(0) | |
22 chromosomes = fields.pop(0) | |
23 info_url = fields.pop(0) | |
24 link_site = fields.pop(0) | |
25 if org_num not in orgs: | |
26 orgs[ org_num ] = {} | |
27 orgs[ org_num ][ 'chrs' ] = {} | |
28 orgs[ org_num ][ 'name' ] = name | |
29 orgs[ org_num ][ 'kingdom' ] = kingdom | |
30 orgs[ org_num ][ 'group' ] = group | |
31 orgs[ org_num ][ 'chromosomes' ] = chromosomes | |
32 orgs[ org_num ][ 'info_url' ] = info_url | |
33 orgs[ org_num ][ 'link_site' ] = link_site | |
34 elif info_type.upper() == "CHR": | |
35 #CHR 12521 CP000315 Clostridium perfringens phage phiSM101, complete genome 38092 110684521 CP000315.1 | |
36 org_num = fields.pop(0) | |
37 chr_acc = fields.pop(0) | |
38 name = fields.pop(0) | |
39 length = fields.pop(0) | |
40 gi = fields.pop(0) | |
41 gb = fields.pop(0) | |
42 info_url = fields.pop(0) | |
43 chr = {} | |
44 chr[ 'name' ] = name | |
45 chr[ 'length' ] = length | |
46 chr[ 'gi' ] = gi | |
47 chr[ 'gb' ] = gb | |
48 chr[ 'info_url' ] = info_url | |
49 if org_num not in orgs: | |
50 orgs[ org_num ] = {} | |
51 orgs[ org_num ][ 'chrs' ] = {} | |
52 orgs[ org_num ][ 'chrs' ][ chr_acc ] = chr | |
53 elif info_type.upper() == "DATA": | |
54 #DATA 12521_12521_CDS 12521 CP000315 CDS bed /home/djb396/alignments/playground/bacteria/12521/CP000315.CDS.bed | |
55 uid = fields.pop(0) | |
56 org_num = fields.pop(0) | |
57 chr_acc = fields.pop(0) | |
58 feature = fields.pop(0) | |
59 filetype = fields.pop(0) | |
60 path = fields.pop(0) | |
61 data = {} | |
62 data[ 'filetype' ] = filetype | |
63 data[ 'path' ] = path | |
64 data[ 'feature' ] = feature | |
65 | |
66 if org_num not in orgs: | |
67 orgs[ org_num ] = {} | |
68 orgs[ org_num ][ 'chrs' ] = {} | |
69 if 'data' not in orgs[ org_num ][ 'chrs' ][ chr_acc ]: | |
70 orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ] = {} | |
71 orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ][ uid ] = data | |
72 else: continue | |
73 except: continue | |
74 for org_num in orgs: | |
75 org = orgs[ org_num ] | |
76 if org[ 'kingdom' ] not in microbe_info: | |
77 microbe_info[ org[ 'kingdom' ] ] = {} | |
78 if org_num not in microbe_info[ org[ 'kingdom' ] ]: | |
79 microbe_info[ org[ 'kingdom' ] ][org_num] = org | |
80 return microbe_info | |
81 | |
82 #post processing, set build for data and add additional data to history | |
83 from galaxy import datatypes, config, jobs, tools | |
84 from shutil import copyfile | |
85 | |
86 def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr): | |
87 base_dataset = out_data.items()[0][1] | |
88 history = base_dataset.history | |
89 if history == None: | |
90 print "unknown history!" | |
91 return | |
92 kingdom = param_dict.get( 'kingdom', None ) | |
93 #group = param_dict.get( 'group', None ) | |
94 org = param_dict.get( 'org', None ) | |
95 | |
96 #if not (kingdom or group or org): | |
97 if not (kingdom or org): | |
98 print "Parameters are not available." | |
99 #workflow passes galaxy.tools.parameters.basic.UnvalidatedValue instead of values | |
100 if isinstance( kingdom, tools.parameters.basic.UnvalidatedValue ): | |
101 kingdom = kingdom.value | |
102 if isinstance( org, tools.parameters.basic.UnvalidatedValue ): | |
103 org = org.value | |
104 | |
105 GALAXY_DATA_INDEX_DIR = app.config.tool_data_path | |
106 microbe_info = load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ) | |
107 new_stdout = "" | |
108 split_stdout = stdout.split("\n") | |
109 basic_name = "" | |
110 for line in split_stdout: | |
111 fields = line.split("\t") | |
112 if fields[0] == "#File1": | |
113 description = fields[1] | |
114 chr = fields[2] | |
115 dbkey = fields[3] | |
116 file_type = fields[4] | |
117 name, data = out_data.items()[0] | |
118 data.set_size() | |
119 basic_name = data.name | |
120 data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")" | |
121 data.dbkey = dbkey | |
122 data.info = data.name | |
123 data = app.datatypes_registry.change_datatype( data, file_type ) | |
124 data.init_meta() | |
125 data.set_peek() | |
126 app.model.context.add( data ) | |
127 app.model.context.flush() | |
128 elif fields[0] == "#NewFile": | |
129 description = fields[1] | |
130 chr = fields[2] | |
131 dbkey = fields[3] | |
132 filepath = fields[4] | |
133 file_type = fields[5] | |
134 newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context ) #This import should become a library | |
135 newdata.set_size() | |
136 newdata.extension = file_type | |
137 newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")" | |
138 app.model.context.add( newdata ) | |
139 app.model.context.flush() | |
140 app.security_agent.copy_dataset_permissions( base_dataset.dataset, newdata.dataset ) | |
141 history.add_dataset( newdata ) | |
142 app.model.context.add( history ) | |
143 app.model.context.flush() | |
144 try: | |
145 copyfile(filepath,newdata.file_name) | |
146 newdata.info = newdata.name | |
147 newdata.state = jobs.JOB_OK | |
148 except: | |
149 newdata.info = "The requested file is missing from the system." | |
150 newdata.state = jobs.JOB_ERROR | |
151 newdata.dbkey = dbkey | |
152 newdata.init_meta() | |
153 newdata.set_peek() | |
154 app.model.context.flush() |