0
|
1
|
|
2 def load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ):
|
|
3 # FIXME: this function is duplicated in the DynamicOptions class. It is used here only to
|
|
4 # set data.name in exec_after_process().
|
|
5 microbe_info= {}
|
|
6 orgs = {}
|
|
7
|
|
8 filename = "%s/microbial_data.loc" % GALAXY_DATA_INDEX_DIR
|
|
9 for i, line in enumerate( open( filename ) ):
|
|
10 line = line.rstrip( '\r\n' )
|
|
11 if line and not line.startswith( '#' ):
|
|
12 fields = line.split( sep )
|
|
13 #read each line, if not enough fields, go to next line
|
|
14 try:
|
|
15 info_type = fields.pop(0)
|
|
16 if info_type.upper() == "ORG":
|
|
17 #ORG 12521 Clostridium perfringens SM101 bacteria Firmicutes CP000312,CP000313,CP000314,CP000315 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=12521
|
|
18 org_num = fields.pop(0)
|
|
19 name = fields.pop(0)
|
|
20 kingdom = fields.pop(0)
|
|
21 group = fields.pop(0)
|
|
22 chromosomes = fields.pop(0)
|
|
23 info_url = fields.pop(0)
|
|
24 link_site = fields.pop(0)
|
|
25 if org_num not in orgs:
|
|
26 orgs[ org_num ] = {}
|
|
27 orgs[ org_num ][ 'chrs' ] = {}
|
|
28 orgs[ org_num ][ 'name' ] = name
|
|
29 orgs[ org_num ][ 'kingdom' ] = kingdom
|
|
30 orgs[ org_num ][ 'group' ] = group
|
|
31 orgs[ org_num ][ 'chromosomes' ] = chromosomes
|
|
32 orgs[ org_num ][ 'info_url' ] = info_url
|
|
33 orgs[ org_num ][ 'link_site' ] = link_site
|
|
34 elif info_type.upper() == "CHR":
|
|
35 #CHR 12521 CP000315 Clostridium perfringens phage phiSM101, complete genome 38092 110684521 CP000315.1
|
|
36 org_num = fields.pop(0)
|
|
37 chr_acc = fields.pop(0)
|
|
38 name = fields.pop(0)
|
|
39 length = fields.pop(0)
|
|
40 gi = fields.pop(0)
|
|
41 gb = fields.pop(0)
|
|
42 info_url = fields.pop(0)
|
|
43 chr = {}
|
|
44 chr[ 'name' ] = name
|
|
45 chr[ 'length' ] = length
|
|
46 chr[ 'gi' ] = gi
|
|
47 chr[ 'gb' ] = gb
|
|
48 chr[ 'info_url' ] = info_url
|
|
49 if org_num not in orgs:
|
|
50 orgs[ org_num ] = {}
|
|
51 orgs[ org_num ][ 'chrs' ] = {}
|
|
52 orgs[ org_num ][ 'chrs' ][ chr_acc ] = chr
|
|
53 elif info_type.upper() == "DATA":
|
|
54 #DATA 12521_12521_CDS 12521 CP000315 CDS bed /home/djb396/alignments/playground/bacteria/12521/CP000315.CDS.bed
|
|
55 uid = fields.pop(0)
|
|
56 org_num = fields.pop(0)
|
|
57 chr_acc = fields.pop(0)
|
|
58 feature = fields.pop(0)
|
|
59 filetype = fields.pop(0)
|
|
60 path = fields.pop(0)
|
|
61 data = {}
|
|
62 data[ 'filetype' ] = filetype
|
|
63 data[ 'path' ] = path
|
|
64 data[ 'feature' ] = feature
|
|
65
|
|
66 if org_num not in orgs:
|
|
67 orgs[ org_num ] = {}
|
|
68 orgs[ org_num ][ 'chrs' ] = {}
|
|
69 if 'data' not in orgs[ org_num ][ 'chrs' ][ chr_acc ]:
|
|
70 orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ] = {}
|
|
71 orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ][ uid ] = data
|
|
72 else: continue
|
|
73 except: continue
|
|
74 for org_num in orgs:
|
|
75 org = orgs[ org_num ]
|
|
76 if org[ 'kingdom' ] not in microbe_info:
|
|
77 microbe_info[ org[ 'kingdom' ] ] = {}
|
|
78 if org_num not in microbe_info[ org[ 'kingdom' ] ]:
|
|
79 microbe_info[ org[ 'kingdom' ] ][org_num] = org
|
|
80 return microbe_info
|
|
81
|
|
82 #post processing, set build for data and add additional data to history
|
|
83 from galaxy import datatypes, config, jobs, tools
|
|
84 from shutil import copyfile
|
|
85
|
|
86 def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
|
|
87 base_dataset = out_data.items()[0][1]
|
|
88 history = base_dataset.history
|
|
89 if history == None:
|
|
90 print "unknown history!"
|
|
91 return
|
|
92 kingdom = param_dict.get( 'kingdom', None )
|
|
93 #group = param_dict.get( 'group', None )
|
|
94 org = param_dict.get( 'org', None )
|
|
95
|
|
96 #if not (kingdom or group or org):
|
|
97 if not (kingdom or org):
|
|
98 print "Parameters are not available."
|
|
99 #workflow passes galaxy.tools.parameters.basic.UnvalidatedValue instead of values
|
|
100 if isinstance( kingdom, tools.parameters.basic.UnvalidatedValue ):
|
|
101 kingdom = kingdom.value
|
|
102 if isinstance( org, tools.parameters.basic.UnvalidatedValue ):
|
|
103 org = org.value
|
|
104
|
|
105 GALAXY_DATA_INDEX_DIR = app.config.tool_data_path
|
|
106 microbe_info = load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' )
|
|
107 new_stdout = ""
|
|
108 split_stdout = stdout.split("\n")
|
|
109 basic_name = ""
|
|
110 for line in split_stdout:
|
|
111 fields = line.split("\t")
|
|
112 if fields[0] == "#File1":
|
|
113 description = fields[1]
|
|
114 chr = fields[2]
|
|
115 dbkey = fields[3]
|
|
116 file_type = fields[4]
|
|
117 name, data = out_data.items()[0]
|
|
118 data.set_size()
|
|
119 basic_name = data.name
|
|
120 data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")"
|
|
121 data.dbkey = dbkey
|
|
122 data.info = data.name
|
|
123 data = app.datatypes_registry.change_datatype( data, file_type )
|
|
124 data.init_meta()
|
|
125 data.set_peek()
|
|
126 app.model.context.add( data )
|
|
127 app.model.context.flush()
|
|
128 elif fields[0] == "#NewFile":
|
|
129 description = fields[1]
|
|
130 chr = fields[2]
|
|
131 dbkey = fields[3]
|
|
132 filepath = fields[4]
|
|
133 file_type = fields[5]
|
|
134 newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context ) #This import should become a library
|
|
135 newdata.set_size()
|
|
136 newdata.extension = file_type
|
|
137 newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")"
|
|
138 app.model.context.add( newdata )
|
|
139 app.model.context.flush()
|
|
140 app.security_agent.copy_dataset_permissions( base_dataset.dataset, newdata.dataset )
|
|
141 history.add_dataset( newdata )
|
|
142 app.model.context.add( history )
|
|
143 app.model.context.flush()
|
|
144 try:
|
|
145 copyfile(filepath,newdata.file_name)
|
|
146 newdata.info = newdata.name
|
|
147 newdata.state = jobs.JOB_OK
|
|
148 except:
|
|
149 newdata.info = "The requested file is missing from the system."
|
|
150 newdata.state = jobs.JOB_ERROR
|
|
151 newdata.dbkey = dbkey
|
|
152 newdata.init_meta()
|
|
153 newdata.set_peek()
|
|
154 app.model.context.flush()
|