0
|
1 #!c:\Perl\bin\perl.exe -w
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 bih4maconda.pl - Utility to detect potential contaminants in your peak list based on MaConDa database
|
|
6
|
|
7 =head1 USAGE
|
|
8
|
|
9
|
|
10 =head1 SYNOPSIS
|
|
11
|
|
12 =head1 DESCRIPTION
|
|
13
|
|
14 =over 4
|
|
15
|
|
16 =item B<function01>
|
|
17
|
|
18 =item B<function02>
|
|
19
|
|
20 =back
|
|
21
|
|
22 =head1 AUTHOR
|
|
23
|
|
24 Prenom Nom E<lt>franck.giacomoni@inra.frE<gt>
|
|
25
|
|
26 =head1 SUPPORT
|
|
27
|
|
28 You can find documentation for this module with the perldoc command.
|
|
29
|
|
30 perldoc bih4maconda.pl
|
|
31
|
|
32 =head1 LICENSE
|
|
33
|
|
34 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
|
35
|
|
36 =head1 VERSION
|
|
37
|
|
38 version 1 : 2019/07/24
|
|
39
|
|
40 version 2 : ??
|
|
41
|
|
42 =cut
|
|
43 #=============================================================================
|
|
44 # Included modules and versions
|
|
45 #=============================================================================
|
|
46 ## Perl modules
|
|
47 use strict ;
|
|
48 use warnings ;
|
|
49 use diagnostics ;
|
|
50 use Carp qw (cluck croak carp) ;
|
|
51
|
|
52 use Data::Dumper ;
|
|
53 use Getopt::Long ;
|
|
54 use File::Basename ;
|
|
55 use FindBin ; ## Allows you to locate the directory of original perl script
|
|
56
|
|
57 ## Specific Perl Modules (PFEM)
|
|
58 use lib $FindBin::Bin ;
|
|
59 my $binPath = $FindBin::Bin ;
|
|
60
|
|
61 ## Dedicate Perl Modules (Home made...)
|
|
62 use Metabolomics::Fragment::Annotation qw( :all ) ;
|
|
63 use Metabolomics::Banks qw( :all ) ;
|
|
64 use Metabolomics::Banks::BloodExposome qw( :all ) ;
|
|
65 use Metabolomics::Banks::MaConDa qw( :all ) ;
|
|
66 use Metabolomics::Banks::AbInitioFragments qw( :all ) ;
|
|
67 use Metabolomics::Utils qw( :all ) ;
|
|
68
|
|
69 ## Initialized values
|
|
70 my $ProgramName = basename($0) ;
|
|
71 my $OptionHelp = undef ;
|
|
72 my $VERBOSE = undef ;
|
|
73
|
|
74 my ($inputFile, $mzCol, $asHeader, $ppmError, $mode, $instruments, $instrumentTypes) = (undef, undef, undef, undef, undef, undef, undef ) ;
|
|
75 my ($outputTabular, $outputXls, $outputHTML, $outputFull) = (undef, undef, undef, undef) ;
|
|
76
|
|
77 my $QueryMode = undef ; # depending of the input data the query mode can be ION|NEUTRAL
|
|
78
|
|
79 #=============================================================================
|
|
80 # Manage EXCEPTIONS
|
|
81 #=============================================================================
|
|
82 &GetOptions ( "h|help" => \$OptionHelp, # HELP
|
|
83 "v|verbose=i" => \$VERBOSE, # Level of verbose (0 to 2)
|
|
84 "i|input=s" => \$inputFile, # Input file containing a peak list (mz)
|
|
85 "mzCol=i" => \$mzCol, # Column in CSV file for MZ
|
|
86 "header=i" => \$asHeader, # CSV file as header (1=true, 0=false)
|
|
87 "ppmError=f" => \$ppmError, # ppm error
|
|
88 "m|mode=s" => \$mode, # indicate the ionisation mode (POS|NEG|NEUTRAL)
|
|
89 "filterInstrument=s" => \$instruments, # instruments listing to apply filter
|
|
90 "filterInstrumentType=s" => \$instrumentTypes, # instrumentTypes listing to apply filter
|
|
91 "outputTab=s" => \$outputTabular, # output file in tabular format
|
|
92 "outputXls=s" => \$outputXls, # output file in Xls format
|
|
93 "outputHtml=s" => \$outputHTML, # output file in html format
|
|
94 "outputFull=s" => \$outputFull, # output file in full format
|
|
95
|
|
96 ) ;
|
|
97
|
|
98 ## if you put the option -help or -h function help is started
|
|
99 if ( defined($OptionHelp) ){ &help ; }
|
|
100
|
|
101 #=============================================================================
|
|
102 # MAIN SCRIPT
|
|
103 #=============================================================================
|
|
104
|
|
105 if ($VERBOSE == 3) {
|
|
106 print "The $ProgramName program is launched as:\n";
|
|
107 print "./$ProgramName " ;
|
|
108 print "--h " if (defined $OptionHelp) ;
|
|
109 print "--input $inputFile " if (defined $inputFile) ;
|
|
110 print "--mzCol $mzCol " if (defined $mzCol) ;
|
|
111 print "--header $asHeader " if (defined $asHeader) ;
|
|
112 print "--ppmError $ppmError " if (defined $ppmError) ;
|
|
113 print "--mode $mode " if (defined $mode) ;
|
|
114 print "--filterInstrument $instruments " if (defined $instruments) ;
|
|
115 print "--filterInstrumentType $instrumentTypes " if (defined $instrumentTypes) ;
|
|
116 # print "--outputXls $outputXls " if (defined $outputXls) ;
|
|
117 print "--outputTab $outputTabular " if (defined $outputTabular) ;
|
|
118 # print "--outputHtml $outputHTML " if (defined $outputHTML) ;
|
|
119 print "--outputFull $outputFull " if (defined $outputFull) ;
|
|
120 print "with verbose $VERBOSE" ;
|
|
121 print "\n" ;
|
|
122 }
|
|
123
|
|
124 ## Get conf
|
|
125 my ( $oCONF, $oTEMPLATE) = ( undef, undef ) ;
|
|
126 foreach my $conf ( <$binPath/*.cfg> ) { $oCONF = Metabolomics::Utils->utilsAsConf($conf) ; }
|
|
127
|
|
128 foreach my $template ( <$binPath/_template.tabular> ) { $oTEMPLATE = $template ; }
|
|
129
|
|
130 #print Dumper $oCONF ;
|
|
131
|
|
132
|
|
133 if ( ( defined $inputFile ) and ( $inputFile ne "" ) and ( -e $inputFile ) ) {
|
|
134
|
|
135 # create a empty bank object
|
|
136 my $oBank = Metabolomics::Banks::MaConDa->new() ;
|
|
137
|
|
138 # get contaminants bank
|
|
139 my ($oFilteredContaminants, $totalEntryNum, $fiteredEntryNum) = (undef, 0, 0) ;
|
|
140
|
|
141 $oBank->getContaminantsExtensiveFromSource() ;
|
|
142
|
|
143 # Applying filters
|
|
144
|
|
145 ## Instrument #Filter01 (ALL|...)
|
|
146 if ( defined $instruments ) {
|
|
147 my @instruments = split(/,/, $instruments) ;
|
|
148 ($oFilteredContaminants, $totalEntryNum, $fiteredEntryNum) = $oBank->filterContaminantInstruments(\@instruments) ;
|
|
149 print "Only few instruments were selected: $fiteredEntryNum/$totalEntryNum entries were filtered.\n" ;
|
|
150 }
|
|
151 ## Instrument type #Filter02 (ALL|...)
|
|
152 if ( defined $instrumentTypes ) {
|
|
153
|
|
154 my @instrumentTypes = split(/,/, $instrumentTypes) ;
|
|
155 if (!defined $oFilteredContaminants){ ($oFilteredContaminants, $totalEntryNum, $fiteredEntryNum) = $oBank->filterContaminantInstrumentTypes(\@instrumentTypes) ; }
|
|
156 else { ($oFilteredContaminants, $totalEntryNum, $fiteredEntryNum) = $oFilteredContaminants->filterContaminantInstrumentTypes(\@instrumentTypes) ; }
|
|
157
|
|
158 print "Only few instrument types were selected: $fiteredEntryNum/$totalEntryNum entries were filtered.\n" ;
|
|
159 }
|
|
160 ## Ion mode #Filter03 (POS|NEG|BOTH)
|
|
161 if ( defined $mode) {
|
|
162
|
|
163 print "Mode selected is $mode..." ;
|
|
164
|
|
165 if (!defined $oFilteredContaminants){ $oFilteredContaminants = $oBank->filterContaminantIonMode($mode) ; }
|
|
166 else { $oFilteredContaminants = $oFilteredContaminants->filterContaminantIonMode($mode) ; }
|
|
167
|
|
168 if ( ($mode eq 'POSITIVE') or ($mode eq 'NEGATIVE') or ($mode eq 'BOTH') ) {
|
|
169 $QueryMode = 'ION' ;
|
|
170 }
|
|
171 elsif ($mode eq 'NEUTRAL') {
|
|
172 $QueryMode = 'NEUTRAL' ;
|
|
173 }
|
|
174 else {
|
|
175 croak "[ERROR] mode $mode is not recognize...Please defined a acceptable one\n" ;
|
|
176 }
|
|
177 print "and detected query mode is $QueryMode...\n" ;
|
|
178 }
|
|
179 # in case no filter is defined beforr
|
|
180 if ( ( !defined $mode) and ( !defined $instruments) and ( !defined $instrumentTypes) ) {
|
|
181 $oFilteredContaminants = $oBank ;
|
|
182 }
|
|
183
|
|
184 # build the query object
|
|
185 $oFilteredContaminants->buildTheoPeakBankFromContaminants($QueryMode) ;
|
|
186
|
|
187 # get experimental masses
|
|
188 if ( (defined $mzCol) and (defined $asHeader) ) {
|
|
189 $oFilteredContaminants->parsingMsFragments($inputFile, $asHeader, $mzCol) ;
|
|
190 }
|
|
191
|
|
192 ## Analysis :
|
|
193 my $oAnalysis = Metabolomics::Fragment::Annotation->new($oFilteredContaminants) ;
|
|
194
|
|
195
|
|
196 # Compare peaklists:
|
|
197 $oAnalysis->compareExpMzToTheoMzList('PPM', $ppmError) ;
|
|
198
|
|
199 # print Dumper $oBank ;
|
|
200
|
|
201
|
|
202
|
|
203 # complete initial input tabular file
|
|
204 if ( (defined $outputFull) and (defined $inputFile) ) {
|
|
205 my $tabularfile = $oAnalysis->writeFullTabularWithPeakBankObject($inputFile, $oTEMPLATE, $outputFull)
|
|
206 }
|
|
207 # write a simple tabular output
|
|
208 if ( (defined $outputTabular) and (defined $inputFile) ) {
|
|
209 my $tabularfile = $oAnalysis->writeTabularWithPeakBankObject($oTEMPLATE, $outputTabular) ;
|
|
210 }
|
|
211 }
|
|
212 else {
|
|
213 croak "Input file is not defined or is not exist.\n" ;
|
|
214 }
|
|
215
|
|
216
|
|
217
|
|
218
|
|
219
|
|
220 #====================================================================================
|
|
221 # Help subroutine called with -h option
|
|
222 # number of arguments : 0
|
|
223 # Argument(s) :
|
|
224 # Return : 1
|
|
225 #====================================================================================
|
|
226 sub help {
|
|
227
|
|
228
|
|
229 print STDERR <<EOF ;
|
|
230 ### $ProgramName ###
|
|
231 #
|
|
232 # AUTHOR: Franck Giacomoni
|
|
233 # VERSION: 1.0
|
|
234 # CREATED: 2019/08/21
|
|
235 # LAST MODIF:
|
|
236 # PURPOSE: This program annotates any known contaminants from ms analysis depending some filters as instruments, ionisation mode from MaConDa database
|
|
237 # USAGE: $ProgramName or $ProgramName --input *.tabular --mzCol INT --header INT --ppmError 5 --mode POSITIVE|NEGATIVE|NEUTRAL --outputTab outTab.tabular --outputFull outFull.tabular --verbose 3
|
|
238 EOF
|
|
239 exit(1) ;
|
|
240 }
|
|
241
|
|
242 ## END of script - F Giacomoni
|
|
243
|
|
244 __END__ |