0
|
1 #!c:\Perl\bin\perl.exe -w
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 bih4maconda.pl - Utility to detect potential contaminants in your peak list based on MaConDa database
|
|
6
|
|
7 =head1 USAGE
|
|
8
|
|
9
|
|
10 =head1 SYNOPSIS
|
|
11
|
|
12 =head1 DESCRIPTION
|
|
13
|
|
14 =over 4
|
|
15
|
|
16 =item B<function01>
|
|
17
|
|
18 =item B<function02>
|
|
19
|
|
20 =back
|
|
21
|
|
22 =head1 AUTHOR
|
|
23
|
|
24 Prenom Nom E<lt>franck.giacomoni@inra.frE<gt>
|
|
25
|
|
26 =head1 SUPPORT
|
|
27
|
|
28 You can find documentation for this module with the perldoc command.
|
|
29
|
|
30 perldoc bih4maconda.pl
|
|
31
|
|
32 =head1 LICENSE
|
|
33
|
|
34 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
|
35
|
|
36 =head1 VERSION
|
|
37
|
|
38 version 1 : 2019/07/24
|
|
39
|
|
40 version 2 : ??
|
|
41
|
|
42 =cut
|
|
43 #=============================================================================
|
|
44 # Included modules and versions
|
|
45 #=============================================================================
|
|
46 ## Perl modules
|
|
47 use strict ;
|
|
48 use warnings ;
|
|
49 use diagnostics ;
|
|
50 use Carp qw (cluck croak carp) ;
|
|
51
|
|
52 use Data::Dumper ;
|
|
53 use Getopt::Long ;
|
|
54 use File::Basename ;
|
|
55 use FindBin ; ## Allows you to locate the directory of original perl script
|
|
56
|
|
57 ## Specific Perl Modules (PFEM)
|
|
58 use lib $FindBin::Bin ;
|
|
59 my $binPath = $FindBin::Bin ;
|
|
60
|
|
61 ## Dedicate Perl Modules (Home made...)
|
|
62 use Metabolomics::Fragment::Annotation qw( :all ) ;
|
|
63 use Metabolomics::Banks qw( :all ) ;
|
|
64 use Metabolomics::Banks::MaConDa qw( :all ) ;
|
|
65 use Metabolomics::Utils qw( :all ) ;
|
|
66
|
|
67 ## Initialized values
|
|
68 my $ProgramName = basename($0) ;
|
|
69 my $OptionHelp = undef ;
|
|
70 my $VERBOSE = undef ;
|
|
71
|
|
72 my ($inputFile, $mzCol, $asHeader, $ppmError, $mode, $instruments, $instrumentTypes) = (undef, undef, undef, undef, undef, undef, undef ) ;
|
|
73 my ($outputTabular, $outputXls, $outputHTML, $outputFull) = (undef, undef, undef, undef) ;
|
|
74
|
|
75 my $QueryMode = undef ; # depending of the input data the query mode can be ION|NEUTRAL
|
|
76
|
|
77 #=============================================================================
|
|
78 # Manage EXCEPTIONS
|
|
79 #=============================================================================
|
|
80 &GetOptions ( "h|help" => \$OptionHelp, # HELP
|
|
81 "v|verbose=i" => \$VERBOSE, # Level of verbose (0 to 2)
|
|
82 "i|input=s" => \$inputFile, # Input file containing a peak list (mz)
|
|
83 "mzCol=i" => \$mzCol, # Column in CSV file for MZ
|
|
84 "header=i" => \$asHeader, # CSV file as header (1=true, 0=false)
|
|
85 "ppmError=f" => \$ppmError, # ppm error
|
|
86 "m|mode=s" => \$mode, # indicate the ionisation mode (POS|NEG|NEUTRAL)
|
|
87 "filterInstrument=s" => \$instruments, # instruments listing to apply filter
|
|
88 "filterInstrumentType=s" => \$instrumentTypes, # instrumentTypes listing to apply filter
|
|
89 "outputTab=s" => \$outputTabular, # output file in tabular format
|
|
90 "outputXls=s" => \$outputXls, # output file in Xls format
|
|
91 "outputHtml=s" => \$outputHTML, # output file in html format
|
|
92 "outputFull=s" => \$outputFull, # output file in full format
|
|
93
|
|
94 ) ;
|
|
95
|
|
96 ## if you put the option -help or -h function help is started
|
|
97 if ( defined($OptionHelp) ){ &help ; }
|
|
98
|
|
99 #=============================================================================
|
|
100 # MAIN SCRIPT
|
|
101 #=============================================================================
|
|
102
|
|
103 if ($VERBOSE == 3) {
|
|
104 print "The $ProgramName program is launched as:\n";
|
|
105 print "./$ProgramName " ;
|
|
106 print "--h " if (defined $OptionHelp) ;
|
|
107 print "--input $inputFile " if (defined $inputFile) ;
|
|
108 print "--mzCol $mzCol " if (defined $mzCol) ;
|
|
109 print "--header $asHeader " if (defined $asHeader) ;
|
|
110 print "--ppmError $ppmError " if (defined $ppmError) ;
|
|
111 print "--mode $mode " if (defined $mode) ;
|
|
112 print "--filterInstrument $instruments " if (defined $instruments) ;
|
|
113 print "--filterInstrumentType $instrumentTypes " if (defined $instrumentTypes) ;
|
|
114 # print "--outputXls $outputXls " if (defined $outputXls) ;
|
|
115 print "--outputTab $outputTabular " if (defined $outputTabular) ;
|
|
116 # print "--outputHtml $outputHTML " if (defined $outputHTML) ;
|
|
117 print "--outputFull $outputFull " if (defined $outputFull) ;
|
|
118 print "with verbose $VERBOSE" ;
|
|
119 print "\n" ;
|
|
120 }
|
|
121
|
|
122 ## Get conf
|
|
123 my ( $oCONF, $oTEMPLATE) = ( undef, undef ) ;
|
|
124 foreach my $conf ( <$binPath/*.cfg> ) { $oCONF = Metabolomics::Utils->utilsAsConf($conf) ; }
|
|
125
|
|
126 foreach my $template ( <$binPath/_template.tabular> ) { $oTEMPLATE = $template ; }
|
|
127
|
|
128 #print Dumper $oCONF ;
|
|
129
|
|
130
|
|
131 if ( ( defined $inputFile ) and ( $inputFile ne "" ) and ( -e $inputFile ) ) {
|
|
132
|
|
133 # create a empty bank object
|
|
134 my $oBank = Metabolomics::Banks::MaConDa->new() ;
|
|
135
|
|
136 # get contaminants bank
|
|
137 my ($oFilteredContaminants, $totalEntryNum, $fiteredEntryNum) = (undef, 0, 0) ;
|
|
138
|
|
139 $oBank->getContaminantsExtensiveFromSource() ;
|
|
140
|
|
141 # Applying filters
|
|
142
|
|
143 ## Instrument #Filter01 (ALL|...)
|
|
144 if ( defined $instruments ) {
|
|
145 my @instruments = split(/,/, $instruments) ;
|
|
146 ($oFilteredContaminants, $totalEntryNum, $fiteredEntryNum) = $oBank->filterContaminantInstruments(\@instruments) ;
|
|
147 print "Only few instruments were selected: $fiteredEntryNum/$totalEntryNum entries were filtered.\n" ;
|
|
148 }
|
|
149 ## Instrument type #Filter02 (ALL|...)
|
|
150 if ( defined $instrumentTypes ) {
|
|
151
|
|
152 my @instrumentTypes = split(/,/, $instrumentTypes) ;
|
|
153 if (!defined $oFilteredContaminants){ ($oFilteredContaminants, $totalEntryNum, $fiteredEntryNum) = $oBank->filterContaminantInstrumentTypes(\@instrumentTypes) ; }
|
|
154 else { ($oFilteredContaminants, $totalEntryNum, $fiteredEntryNum) = $oFilteredContaminants->filterContaminantInstrumentTypes(\@instrumentTypes) ; }
|
|
155
|
|
156 print "Only few instrument types were selected: $fiteredEntryNum/$totalEntryNum entries were filtered.\n" ;
|
|
157 }
|
|
158 ## Ion mode #Filter03 (POS|NEG|BOTH)
|
|
159 if ( defined $mode) {
|
|
160
|
|
161 print "Mode selected is $mode..." ;
|
|
162
|
|
163 if (!defined $oFilteredContaminants){ $oFilteredContaminants = $oBank->filterContaminantIonMode($mode) ; }
|
|
164 else { $oFilteredContaminants = $oFilteredContaminants->filterContaminantIonMode($mode) ; }
|
|
165
|
|
166 if ( ($mode eq 'POSITIVE') or ($mode eq 'NEGATIVE') or ($mode eq 'BOTH') ) {
|
|
167 $QueryMode = 'ION' ;
|
|
168 }
|
|
169 elsif ($mode eq 'NEUTRAL') {
|
|
170 $QueryMode = 'NEUTRAL' ;
|
|
171 }
|
|
172 else {
|
|
173 croak "[ERROR] mode $mode is not recognize...Please defined a acceptable one\n" ;
|
|
174 }
|
|
175 print "and detected query mode is $QueryMode...\n" ;
|
|
176 }
|
|
177 # in case no filter is defined beforr
|
|
178 if ( ( !defined $mode) and ( !defined $instruments) and ( !defined $instrumentTypes) ) {
|
|
179 $oFilteredContaminants = $oBank ;
|
|
180 }
|
|
181
|
|
182 # build the query object
|
|
183 $oFilteredContaminants->buildTheoPeakBankFromContaminants($QueryMode) ;
|
|
184
|
|
185 # get experimental masses
|
|
186 if ( (defined $mzCol) and (defined $asHeader) ) {
|
|
187 $oFilteredContaminants->parsingMsFragments($inputFile, $asHeader, $mzCol) ;
|
|
188 }
|
|
189
|
|
190 ## Analysis :
|
|
191 my $oAnalysis = Metabolomics::Fragment::Annotation->new($oFilteredContaminants) ;
|
|
192
|
|
193
|
|
194 # Compare peaklists:
|
|
195 $oAnalysis->compareExpMzToTheoMzList('PPM', $ppmError) ;
|
|
196
|
|
197 # print Dumper $oBank ;
|
|
198
|
|
199
|
|
200
|
|
201 # complete initial input tabular file
|
|
202 if ( (defined $outputFull) and (defined $inputFile) ) {
|
|
203 my $tabularfile = $oAnalysis->writeFullTabularWithPeakBankObject($inputFile, $oTEMPLATE, $outputFull)
|
|
204 }
|
|
205 # write a simple tabular output
|
|
206 if ( (defined $outputTabular) and (defined $inputFile) ) {
|
|
207 my $tabularfile = $oAnalysis->writeTabularWithPeakBankObject($oTEMPLATE, $outputTabular) ;
|
|
208 }
|
|
209 }
|
|
210 else {
|
|
211 croak "Input file is not defined or is not exist.\n" ;
|
|
212 }
|
|
213
|
|
214
|
|
215
|
|
216
|
|
217
|
|
218 #====================================================================================
|
|
219 # Help subroutine called with -h option
|
|
220 # number of arguments : 0
|
|
221 # Argument(s) :
|
|
222 # Return : 1
|
|
223 #====================================================================================
|
|
224 sub help {
|
|
225
|
|
226
|
|
227 print STDERR <<EOF ;
|
|
228 ### $ProgramName ###
|
|
229 #
|
|
230 # AUTHOR: Franck Giacomoni
|
|
231 # VERSION: 1.0
|
|
232 # CREATED: 2019/08/21
|
|
233 # LAST MODIF:
|
|
234 # PURPOSE: This program annotates any known contaminants from ms analysis depending some filters as instruments, ionisation mode from MaConDa database
|
|
235 # USAGE: $ProgramName or $ProgramName --input *.tabular --mzCol INT --header INT --ppmError 5 --mode POSITIVE|NEGATIVE|NEUTRAL --outputTab outTab.tabular --outputFull outFull.tabular --verbose 3
|
|
236 EOF
|
|
237 exit(1) ;
|
|
238 }
|
|
239
|
|
240 ## END of script - F Giacomoni
|
|
241
|
|
242 __END__ |