Mercurial > repos > fgiacomoni > hmdb_ms_search
comparison wsdl_hmdb.pl @ 1:6d0a0f8f672a draft
planemo upload commit f67323ae4fa7fdbd9f4518ede105a7d7cd44b471
author | fgiacomoni |
---|---|
date | Wed, 23 Nov 2016 09:31:50 -0500 |
parents | |
children | 6091a80df951 |
comparison
equal
deleted
inserted
replaced
0:9583f9772198 | 1:6d0a0f8f672a |
---|---|
1 #!perl | |
2 | |
3 ## script : wsdl_hmdb.pl | |
4 #============================================================================= | |
5 # Included modules and versions | |
6 #============================================================================= | |
7 ## Perl modules | |
8 use strict ; | |
9 use warnings ; | |
10 use Carp qw (cluck croak carp) ; | |
11 | |
12 use Data::Dumper ; | |
13 use Getopt::Long ; | |
14 use POSIX ; | |
15 use FindBin ; ## Permet de localisez le repertoire du script perl d'origine | |
16 | |
17 ## Specific Modules (Home made...) | |
18 use lib $FindBin::Bin ; | |
19 my $binPath = $FindBin::Bin ; | |
20 use lib::hmdb qw( :ALL ) ; | |
21 ## PFEM Perl Modules | |
22 use lib::conf qw( :ALL ) ; | |
23 use lib::csv qw( :ALL ) ; | |
24 | |
25 ## Initialized values | |
26 my ( $help ) = undef ; | |
27 my ( $mass ) = undef ; | |
28 my ( $masses_file, $col_id, $col_mass, $header_choice, $nbline_header ) = ( undef, undef, undef, undef, undef ) ; | |
29 my $max_query = undef ; | |
30 my ( $delta, $molecular_species, $out_tab, $out_html, $out_xls ) = ( undef, undef, undef, undef, undef ) ; | |
31 | |
32 | |
33 #============================================================================= | |
34 # Manage EXCEPTIONS | |
35 #============================================================================= | |
36 | |
37 &GetOptions ( "h" => \$help, # HELP | |
38 "mass:s" => \$mass, ## option : one masse | |
39 "masses:s" => \$masses_file, ## option : path to the input | |
40 "header_choice:s" => \$header_choice, ## Presence or not of header in tabular file | |
41 "nblineheader:i" => \$nbline_header, ## numbre of header line present in file | |
42 "colfactor:i" => \$col_mass, ## Column id for retrieve formula list in tabular file | |
43 "delta:f" => \$delta, | |
44 "mode:s" => \$molecular_species, ## Molecular species (positive/negative/neutral) | |
45 "maxquery:i" => \$max_query, ## Maximum query return (default is 20 entries by query // min 1 & max 50 ) | |
46 "output_tabular:s" => \$out_tab, ## option : path to the ouput (tabular : input+results ) | |
47 "output_html|v:s" => \$out_html, ## option : path to the results view (output2) | |
48 "output_xlsx:s" => \$out_xls, ## option : path to the xls-like format output | |
49 ) ; | |
50 | |
51 #============================================================================= | |
52 # EXCEPTIONS | |
53 #============================================================================= | |
54 $help and &help ; | |
55 | |
56 #============================================================================= | |
57 # MAIN SCRIPT | |
58 #============================================================================= | |
59 | |
60 | |
61 ## -------------- Conf file ------------------------ : | |
62 my ( $CONF ) = ( undef ) ; | |
63 foreach my $conf ( <$binPath/*.cfg> ) { | |
64 my $oConf = lib::conf::new() ; | |
65 $CONF = $oConf->as_conf($conf) ; | |
66 } | |
67 | |
68 ## -------------- HTML template file ------------------------ : | |
69 foreach my $html_template ( <$binPath/*.tmpl> ) { $CONF->{'HTML_TEMPLATE'} = $html_template ; } | |
70 | |
71 if (!defined $max_query) { | |
72 $max_query = $CONF->{'HMDB_MAX_QUERY'} ; | |
73 } | |
74 | |
75 | |
76 ## --------------- Global parameters ---------------- : | |
77 my ( $ids, $masses, $results ) = ( undef, undef, undef ) ; | |
78 my ( $complete_rows, $nb_pages_for_html_out ) = ( undef, 1 ) ; | |
79 my $metabocard_features = undef ; | |
80 my $search_condition = "Search params : Molecular specie = $molecular_species / delta (mass-to-charge ratio) = $delta" ; | |
81 | |
82 ## --------------- retrieve input data -------------- : | |
83 | |
84 ## manage only one mass | |
85 if ( ( defined $mass ) and ( $mass ne '' ) ) { | |
86 my @masses = split(" ", $mass); | |
87 $masses = \@masses ; | |
88 for (my $i=1 ; $i<=$#masses+1 ; $i++){ push (@$ids,"mz_0".sprintf("%04s", $i ) ); } | |
89 } ## END IF | |
90 ## manage csv file containing list of masses | |
91 elsif ( ( defined $masses_file ) and ( $masses_file ne "" ) and ( -e $masses_file ) ) { | |
92 ## parse all csv for later : output csv build | |
93 my $ocsv_input = lib::csv->new() ; | |
94 my $complete_csv = $ocsv_input->get_csv_object( "\t" ) ; | |
95 $complete_rows = $ocsv_input->parse_csv_object($complete_csv, \$masses_file) ; | |
96 | |
97 ## parse masses and set ids | |
98 my $ocsv = lib::csv->new() ; | |
99 my $csv = $ocsv->get_csv_object( "\t" ) ; | |
100 if ( ( !defined $nbline_header ) or ( $nbline_header < 0 ) ) { $nbline_header = 0 ; } | |
101 $masses = $ocsv->get_value_from_csv_multi_header( $csv, $masses_file, $col_mass, $header_choice, $nbline_header ) ; ## retrieve mz values on csv | |
102 my $nbmz = @$masses ; | |
103 for (my $i=1 ; $i<=$nbmz+1 ; $i++){ push (@$ids,"mz_0".sprintf("%04s", $i ) ); } | |
104 } | |
105 else { | |
106 warn "[warning] Input data are missing : none mass or file of masses\n" ; | |
107 &help ; | |
108 } | |
109 #print Dumper $masses ; | |
110 ## ---------------- launch queries -------------------- : | |
111 | |
112 if ( ( defined $delta ) and ( $delta > 0 ) and ( defined $molecular_species ) and ( $molecular_species ne '' ) ) { | |
113 ## prepare masses list and execute query | |
114 my $oHmdb = lib::hmdb::new() ; | |
115 my $hmdb_pages = undef ; | |
116 my $hmdb_ids = undef ; | |
117 | |
118 $results = [] ; # prepare arrays ref | |
119 my $submasses = $oHmdb->extract_sub_mz_lists($masses, $CONF->{HMDB_LIMITS} ) ; | |
120 | |
121 ## get the hmdb server status by a test query - continuous queries or kill script. | |
122 my $status = $oHmdb->test_matches_from_hmdb_ua() ; | |
123 $oHmdb->check_state_from_hmdb_ua($status) ; ## can kill the script execution | |
124 | |
125 foreach my $mzs ( @{$submasses} ) { | |
126 | |
127 my $result = undef ; | |
128 my ( $hmdb_masses, $nb_masses_to_submit ) = $oHmdb->prepare_multi_masses_query($mzs) ; | |
129 $hmdb_pages = $oHmdb->get_matches_from_hmdb_ua($hmdb_masses, $delta, $molecular_species) ; | |
130 ($result) = $oHmdb->parse_hmdb_csv_results($hmdb_pages, $mzs, $max_query) ; ## hash format result | |
131 ## This previous step return results with cutoff on the number of entries returned ! | |
132 $results = [ @$results, @$result ] ; | |
133 } | |
134 | |
135 ## foreach metabolite get its own metabocard | |
136 $hmdb_ids = $oHmdb->get_unik_ids_from_results($results) ; | |
137 # $hmdb_ids->{'HMDB03125'} = 1 , | |
138 $metabocard_features = $oHmdb->get_hmdb_metabocard_from_id($hmdb_ids, $CONF->{'HMDB_METABOCARD_URL'}) ; ## Try to multithread the querying | |
139 | |
140 ## Map metabocards with results (add supplementary data) | |
141 | |
142 # print Dumper $results ; | |
143 # print Dumper $hmdb_ids ; | |
144 # print Dumper $metabocard_features ; | |
145 | |
146 if ( ( defined $results ) and ( defined $metabocard_features ) ) { | |
147 $results = $oHmdb->map_suppl_data_on_hmdb_results($results, $metabocard_features) ; | |
148 } | |
149 | |
150 ## Uses N mz and theirs entries per page (see config file). | |
151 # how many pages you need with your input mz list? | |
152 $nb_pages_for_html_out = ceil( scalar(@{$masses} ) / $CONF->{HTML_ENTRIES_PER_PAGE} ) ; | |
153 | |
154 # print Dumper $results ; | |
155 } | |
156 else { | |
157 croak "Can't work with HMDB : missing paramaters (list of masses, delta or molecular species)\n" ; | |
158 } ## end ELSE | |
159 | |
160 ## -------------- Produce HTML/CSV output ------------------ : | |
161 | |
162 if ( ( defined $out_html ) and ( defined $results ) ) { | |
163 my $oHtml = lib::hmdb::new() ; | |
164 my ($tbody_object) = $oHtml->set_html_tbody_object( $nb_pages_for_html_out, $CONF->{HTML_ENTRIES_PER_PAGE} ) ; | |
165 ($tbody_object) = $oHtml->add_mz_to_tbody_object($tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $masses, $ids) ; | |
166 ($tbody_object) = $oHtml->add_entries_to_tbody_object($tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $masses, $results) ; | |
167 my $output_html = $oHtml->write_html_skel(\$out_html, $tbody_object, $nb_pages_for_html_out, $search_condition, $CONF->{'HTML_TEMPLATE'}, $CONF->{'JS_GALAXY_PATH'}, $CONF->{'CSS_GALAXY_PATH'}) ; | |
168 | |
169 } ## END IF | |
170 else { | |
171 warn "Can't create a HTML output for HMDB : no result found or your output file is not defined\n" ; | |
172 } | |
173 | |
174 if ( ( defined $out_tab ) and ( defined $results ) ) { | |
175 # produce a csv based on METLIN format | |
176 my $ocsv = lib::hmdb::new() ; | |
177 if (defined $masses_file) { | |
178 my $lm_matrix = undef ; | |
179 if ( ( defined $nbline_header ) and ( $header_choice eq 'yes' ) ) { | |
180 # $lm_matrix = $ocsv->set_lm_matrix_object('hmdb', $masses, $results ) ; | |
181 $lm_matrix = $ocsv->set_hmdb_matrix_object_with_ids('hmdb', $masses, $results ) ; | |
182 $lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($complete_rows, $lm_matrix, $nbline_header-1) ; | |
183 } | |
184 elsif ( ( $header_choice eq 'no' ) ) { | |
185 # $lm_matrix = $ocsv->set_lm_matrix_object(undef, $masses, $results ) ; | |
186 $lm_matrix = $ocsv->set_hmdb_matrix_object_with_ids(undef, $masses, $results ) ; | |
187 $lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($complete_rows, $lm_matrix, 0) ; | |
188 } | |
189 $ocsv->write_csv_skel(\$out_tab, $lm_matrix) ; | |
190 } | |
191 elsif (defined $mass) { | |
192 $ocsv->write_csv_one_mass($masses, $ids, $results, $out_tab) ; | |
193 } | |
194 } ## END IF | |
195 else { | |
196 warn "Can't create a tabular output for HMDB : no result found or your output file is not defined\n" ; | |
197 } | |
198 | |
199 ## Write XLS like format | |
200 if ( ( defined $out_xls ) and ( defined $results ) ) { | |
201 my $ocsv = lib::hmdb::new() ; | |
202 $ocsv->write_csv_one_mass($masses, $ids, $results, $out_xls) ; | |
203 } | |
204 | |
205 | |
206 #==================================================================================== | |
207 # Help subroutine called with -h option | |
208 # number of arguments : 0 | |
209 # Argument(s) : | |
210 # Return : 1 | |
211 #==================================================================================== | |
212 sub help { | |
213 print STDERR " | |
214 help of wsdl_hmdb | |
215 | |
216 # wsdl_hmdb is a script to query HMDB website using mz and return a list of candidates sent by HMDB based on the ms search tool. | |
217 # Input : formula or list of formula | |
218 # Author : Franck Giacomoni and Marion Landi | |
219 # Email : fgiacomoni\@clermont.inra.fr | |
220 # Version : 1.4 | |
221 # Created : 08/07/2012 | |
222 # Updated : 21/01/2016 | |
223 USAGE : | |
224 wsdl_hmdb.pl -mass [one mass or a string list of exact masses] -delta [mz delta] -mode [molecular species: positive|negative|neutral] -output [output tabular file] -view [output html file] | |
225 | |
226 or | |
227 wsdl_hmdb.pl -masses [an input file of mzs] -colfactor [col of mz] -header_choice [yes|no] -nblineheader [nb of lines containing file header : 0-n] | |
228 -delta [mz delta] -mode [molecular species: positive|negative|neutral] -output [output tabular file] -view [output html file] | |
229 | |
230 or | |
231 wsdl_hmdb.pl -h for help | |
232 | |
233 "; | |
234 exit(1); | |
235 } | |
236 | |
237 ## END of script - F Giacomoni | |
238 | |
239 __END__ | |
240 | |
241 =head1 NAME | |
242 | |
243 wsdl_hmdb.pl -- script to query HMDB website using mz and return a list of candidates sent by HMDB based on the ms search tool. | |
244 | |
245 =head1 USAGE | |
246 | |
247 wsdl_hmdb.pl -mass [one mass or a string list of exact masses] -delta [mz delta] -mode [molecular species: positive|negative|neutral] -output [output tabular file] -view [output html file] | |
248 | |
249 or | |
250 wsdl_hmdb.pl -masses [an input file of mzs] -colfactor [col of mz] -header_choice [yes|no] -nblineheader [nb of lines containing file header : 0-n] | |
251 -delta [mz delta] -mode [molecular species: positive|negative|neutral] -output [output tabular file] -view [output html file] | |
252 | |
253 =head1 SYNOPSIS | |
254 | |
255 This script manages batch queries on HMDB server. | |
256 | |
257 =head1 DESCRIPTION | |
258 | |
259 This main program is a script to query HMDB website using mz and return a list of candidates sent by HMDB based on the ms search tool. | |
260 | |
261 =over 4 | |
262 | |
263 =item B<function01> | |
264 | |
265 =item B<function02> | |
266 | |
267 =back | |
268 | |
269 =head1 AUTHOR | |
270 | |
271 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt> | |
272 | |
273 =head1 LICENSE | |
274 | |
275 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
276 | |
277 =head1 VERSION | |
278 | |
279 version 1.0 : 06 / 06 / 2013 | |
280 | |
281 version 1.2 : 27 / 01 / 2014 | |
282 | |
283 version 1.3 : 19 / 11 / 2014 | |
284 | |
285 version 1.4 : 21 / 01 / 2016 - a clean version for community | |
286 | |
287 =cut |