Mercurial > repos > pieterlukasse > prims_metabolomics2

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GCMS/__init__.py	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,6 @@
+'''
+Module containing Galaxy tools for the GC/MS pipeline
+Created on Mar 6, 2012
+
+@author: marcelk
+'''
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GCMS/combine_output.py	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,253 @@
+#!/usr/bin/env python
+# encoding: utf-8
+'''
+Module to combine output from two GCMS Galaxy tools (RankFilter and CasLookup)
+'''
+
+import csv
+import re
+import sys
+import math
+import pprint
+
+__author__ = "Marcel Kempenaar"
+__contact__ = "brs@nbic.nl"
+__copyright__ = "Copyright, 2012, Netherlands Bioinformatics Centre"
+__license__ = "MIT"
+
+def _process_data(in_csv):
+    '''
+    Generic method to parse a tab-separated file returning a dictionary with named columns
+    @param in_csv: input filename to be parsed
+    '''
+    data = list(csv.reader(open(in_csv, 'rU'), delimiter='\t'))
+    header = data.pop(0)
+    # Create dictionary with column name as key
+    output = {}
+    for index in xrange(len(header)):
+        output[header[index]] = [row[index] for row in data]
+    return output
+
+
+def _merge_data(rankfilter, caslookup):
+    '''
+    Merges data from both input dictionaries based on the Centrotype field. This method will
+    build up a new list containing the merged hits as the items.
+    @param rankfilter: dictionary holding RankFilter output in the form of N lists (one list per attribute name)
+    @param caslookup: dictionary holding CasLookup output in the form of N lists (one list per attribute name)
+    '''
+    # TODO: test for correct input files -> rankfilter and caslookup internal lists should have the same lenghts:
+    if (len(rankfilter['ID']) != len(caslookup['Centrotype'])):
+        raise Exception('rankfilter and caslookup files should have the same nr of rows/records ')
+
+    merged = []
+    processed = {}
+    for compound_id_idx in xrange(len(rankfilter['ID'])):
+        compound_id = rankfilter['ID'][compound_id_idx]
+        if not compound_id in processed :
+            # keep track of processed items to not repeat them
+            processed[compound_id] = compound_id
+            # get centrotype nr
+            centrotype = compound_id.split('-')[0]
+            # Get the indices for current compound ID in both data-structures for proper matching
+            rindex = [index for index, value in enumerate(rankfilter['ID']) if value == compound_id]
+            cindex = [index for index, value in enumerate(caslookup['Centrotype']) if value == centrotype]
+
+            merged_hits = []
+            # Combine hits
+            for hit in xrange(len(rindex)):
+                # Create records of hits to be merged ("keys" are the attribute names, so what the lines below do
+                # is create a new "dict" item with same "keys"/attributes, with each attribute filled with its
+                # corresponding value in the rankfilter or caslookup tables; i.e.
+                # rankfilter[key] => returns the list/array with size = nrrows, with the values for the attribute
+                #                    represented by "key". rindex[hit] => points to the row nr=hit (hit is a rownr/index)
+                rf_record = dict(zip(rankfilter.keys(), [rankfilter[key][rindex[hit]] for key in rankfilter.keys()]))
+                cl_record = dict(zip(caslookup.keys(), [caslookup[key][cindex[hit]] for key in caslookup.keys()]))
+
+                merged_hit = _add_hit(rf_record, cl_record)
+                merged_hits.append(merged_hit)
+
+            merged.append(merged_hits)
+
+    return merged, len(rindex)
+
+
+def _add_hit(rankfilter, caslookup):
+    '''
+    Combines single records from both the RankFilter- and CasLookup-tools
+    @param rankfilter: record (dictionary) of one compound in the RankFilter output
+    @param caslookup: matching record (dictionary) of one compound in the CasLookup output
+    '''
+    # The ID in the RankFilter output contains the following 5 fields:
+    rf_id = rankfilter['ID'].split('-')
+    try:
+        name, formula = _remove_formula(rankfilter['Name'])
+        hit = [rf_id[0], # Centrotype
+               rf_id[1], # cent.Factor
+               rf_id[2], # scan nr
+               rf_id[3], # R.T. (umin)
+               rf_id[4], # nr. Peaks
+               # Appending other fields
+               rankfilter['R.T.'],
+               name,
+               caslookup['FORMULA'] if not formula else formula,
+               rankfilter['Library'].strip(),
+               rankfilter['CAS'].strip(),
+               rankfilter['Forward'],
+               rankfilter['Reverse'],
+               ((float(rankfilter['Forward']) + float(rankfilter['Reverse'])) / 2),
+               rankfilter['RIexp'],
+               caslookup['RI'],
+               rankfilter['RIsvr'],
+               # Calculate absolute differences
+               math.fabs(float(rankfilter['RIexp']) - float(rankfilter['RIsvr'])),
+               math.fabs(float(caslookup['RI']) - float(rankfilter['RIexp'])),
+               caslookup['Regression.Column.Name'],
+               caslookup['min'],
+               caslookup['max'],
+               caslookup['nr.duplicates'],
+               caslookup['Column.phase.type'],
+               caslookup['Column.name'],
+               rankfilter['Rank'],
+               rankfilter['%rel.err'],
+               rankfilter['Synonyms']]
+    except KeyError as error:
+        print "Problem reading in data from input file(s):\n",
+        print "Respective CasLookup entry: \n", pprint.pprint(caslookup), "\n"
+        print "Respective RankFilter entry: \n", pprint.pprint(rankfilter), "\n"
+        raise error
+
+    return hit
+
+
+def _remove_formula(name):
+    '''
+    The RankFilter Name field often contains the Formula as well, this function removes it from the Name
+    @param name: complete name of the compound from the RankFilter output
+    '''
+    name = name.split()
+    poss_formula = name[-1]
+    match = re.match("^(([A-Z][a-z]{0,2})(\d*))+$", poss_formula)
+    if match:
+        return ' '.join(name[:-1]), poss_formula
+    else:
+        return ' '.join(name), False
+
+
+def _get_default_caslookup():
+    '''
+    The Cas Lookup tool might not have found all compounds in the library searched,
+    this default dict will be used to combine with the Rank Filter output
+    '''
+    return {'FORMULA': 'N/A',
+            'RI': '0.0',
+            'Regression.Column.Name': 'None',
+            'min': '0.0',
+            'max': '0.0',
+            'nr.duplicates': '0',
+            'Column.phase.type': 'N/A',
+            'Column.name': 'N/A'}
+
+
+def _save_data(data, nhits, out_csv_single, out_csv_multi):
+    '''
+    Writes tab-separated data to file
+    @param data: dictionary containing merged dataset
+    @param out_csv: output csv file
+    '''
+    # Columns we don't repeat:
+    header_part1 = ['Centrotype',
+              'cent.Factor',
+              'scan nr.',
+              'R.T. (umin)',
+              'nr. Peaks',
+              'R.T.']
+    # These are the headers/columns we repeat in case of
+    # combining hits in one line (see alternative_headers method below):
+    header_part2 = [
+              'Name',
+              'FORMULA',
+              'Library',
+              'CAS',
+              'Forward',
+              'Reverse',
+              'Avg. (Forward, Reverse)',
+              'RIexp',
+              'RI',
+              'RIsvr',
+              'RIexp - RIsvr',
+              'RI - RIexp',
+              'Regression.Column.Name',
+              'min',
+              'max',
+              'nr.duplicates',
+              'Column.phase.type',
+              'Column.name',
+              'Rank',
+              '%rel.err',
+              'Synonyms']
+
+    # Open output file for writing
+    outfile_single_handle = open(out_csv_single, 'wb')
+    outfile_multi_handle = open(out_csv_multi, 'wb')
+    output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
+    output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t")
+
+    # Write headers
+    output_single_handle.writerow(header_part1 + header_part2)
+    output_multi_handle.writerow(header_part1 + header_part2 + alternative_headers(header_part2, nhits-1))
+    # Combine all hits for each centrotype into one line
+    line = []
+    for centrotype_idx in xrange(len(data)):
+        i = 0
+        for hit in data[centrotype_idx]:
+            if i==0:
+                line.extend(hit)
+            else:
+                line.extend(hit[6:])
+            i = i+1
+        # small validation (if error, it is a programming error):
+        if i > nhits:
+            raise Exception('Error: more hits that expected for  centrotype_idx ' + centrotype_idx)
+        output_multi_handle.writerow(line)
+        line = []
+
+    # Write one line for each centrotype
+    for centrotype_idx in xrange(len(data)):
+        for hit in data[centrotype_idx]:
+            output_single_handle.writerow(hit)
+
+def alternative_headers(header_part2, nr_alternative_hits):
+    '''
+    This method will iterate over the header names and add the string 'ALT#_' before each,
+    where # is the number of the alternative, according to number of alternative hits we want to add
+    to final csv/tsv
+    '''
+    result = []
+    for i in xrange(nr_alternative_hits):
+        for header_name in header_part2:
+            result.append("ALT" + str(i+1) + "_" + header_name)
+    return result
+
+def main():
+    '''
+    Combine Output main function
+    It will merge the result files from "RankFilter"  and "Lookup RI for CAS numbers"
+    NB: the caslookup_result_file will typically have fewer lines than
+    rankfilter_result_file, so the merge has to consider this as well. The final file
+    should have the same nr of lines as rankfilter_result_file.
+    '''
+    rankfilter_result_file = sys.argv[1]
+    caslookup_result_file = sys.argv[2]
+    output_single_csv = sys.argv[3]
+    output_multi_csv = sys.argv[4]
+
+    # Read RankFilter and CasLookup output files
+    rankfilter = _process_data(rankfilter_result_file)
+    caslookup = _process_data(caslookup_result_file)
+    merged, nhits = _merge_data(rankfilter, caslookup)
+    _save_data(merged, nhits, output_single_csv, output_multi_csv)
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GCMS/combine_output.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,36 @@
+<tool id="combine_output" name="RIQC-Combine RankFilter and CasLookup output" version="1.0.2">
+  <description>Perform a combination of output data from the RankFilter and CasLookup tools</description>
+  <command interpreter="python">
+    combine_output.py $rankfilter_in $caslookup_in $out_single $out_multi
+  </command>
+  <inputs>
+    <param format="tabular" name="rankfilter_in" type="data" label="RIQC-RankFilter output (Estimated RI)"
+    	help="Select the output file from the RankFilter tool"/>
+    <param format="tabular" name="caslookup_in" type="data" label="RIQC-Lookup RI for CAS output ('Known' RI)"
+    	help="Select the output file from the CasLookup tool"/>
+    <!--   <param TODO : could add "tolerance for ERI-KRI"(Estimated RI-Known RI)-->
+  </inputs>
+  <outputs>
+    <data format="tabular" label="${tool.name} (Single) on ${on_string}" name="out_single" />
+    <data format="tabular" label="${tool.name} (Multi) on ${on_string}" name="out_multi" />
+  </outputs>
+  <help>
+Performs a combination of output files from the 'RankFilter' and 'Lookup RI for CAS' tools into two tab-separated files.
+
+The files produced are contain either all hits for a compound on a single line (Single) or on separate lines
+(Multi).
+
+.. class:: infomark
+
+**Notes**
+
+The input data should be produced by the RankFilter and 'Lookup RI for CAS' tools provided on this Galaxy server with the
+original headers kept intact. Processing steps include:
+
+   - Added columns showing the average Forward/Reverse values, RIexp - RIsvr and RI - RIexp values
+   - The ID column of the RankFilter tool output is split into 'Centrotype', 'cent.Factor', 'scan nr.', 'R.T. (umin)'
+     and 'nr. Peaks' fields.
+   - The formula is split off the 'Name' field in the RankFilter output
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GCMS/create_model.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,78 @@
+<tool id="create_poly_model" name="RIQC-Create Regression Model" version="1.0.2">
+  <description>Generate coefficients to enable the regression from one GC-column
+		  		         to another GC-column</description>
+  <command interpreter="Rscript">Rscripts/ridb-regression.R
+               $ridb
+               $out_model
+               $out_log
+               $min_residuals
+               $range_mod
+               $pvalue
+               $rsquared
+               $method
+               $plot
+               #if $plot
+                   $model_graphics
+               #end if
+  </command>
+  <inputs>
+    <param format="tabular" name="ridb" type="select" label="Retention Index (RI) and GC columns Library file"
+           help="Select the RI library file of which all GC columns and their RI values
+                 will be used to create a model"
+      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RI_DB_libraries")'/>
+
+    <param name="method" type="select" label="Select regression method"
+           help="Method to use for calculating the model" >
+           <option value="poly" selected="True">Polynomial (3rd degree)</option>
+           <option value="linear">Linear</option>
+    </param>
+    <param name="min_residuals" type="integer" value="10" optional="False"
+           label="Minimum number of residuals" help="The minimum number of residuals
+                 (datapoints) that both columns should have in common when calculating
+                 the model" />
+    <param name="range_mod" type="integer" value="0" optional="False"
+           label="Range modifier" help="Moves the range of the usable RI space by the
+                  given percentage. Set to 0 to use the full range of available data." />
+    <param name="pvalue" type="float" value="0.05" optional="False" min="0" max="1"
+           label="Pvalue to filter on" help="Set the upper limit for the pvalue (calculated)
+                  by performing an ANOVA analysis on the created model). All models with higher
+                  pvalues are discarded." />
+    <param name="rsquared" type="float" value="0.95" optional="False" min="0" max="1"
+           label="R-squared to filter on" help="Set the lower limit for the R-squared,
+                  all models with lower values are discarded." />
+    <param name="plot" type="boolean" label="Create a separate plot for each model"
+           help="This will create a ZIP file in the history containing PDF plots" />
+  </inputs>
+  <code file="match_library.py" />
+  <outputs>
+  	<data format="zip" label="Model Graphics of ${on_string}" name="model_graphics" >
+  	    <filter>(plot)</filter>
+  	</data>
+    <data format="tabular" label="Regression logfile of ${on_string}"  name="out_log" />
+    <data format="tabular" label="Regression model of ${on_string}"  name="out_model" />
+  </outputs>
+  <help>
+Calculates regression models for a permutation of all GC columns contained in the selected
+RI database file. The method used for creating the model is either based on a 3rd degree
+polynomial or a standard linear model.
+
+The *Minimum number of residuals* option will only allow regression if the columns it is based
+on has at least that number of datapoints on the same compound.
+
+Filtering is possible by setting an upper limit for the *p-value* and / or a lower limit for
+the *R squared* value. The produced logfile will state how many models have been discarded due
+to this filtering. The output model file also includes the p-value and R squared value for
+each created model.
+
+Graphical output of the models is available by selecting the plot option which shows the
+data points used for the model as well as the fit itself and the range of data that will
+be usable.
+
+.. class:: infomark
+
+**Notes**
+
+The output file produced by this tool is required as input for the CasLookup tool when
+selecting to apply regression when finding hits in the RIDB.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GCMS/library_lookup.py	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,327 @@
+'''
+Logic for searching a Retention Index database file given output from NIST
+'''
+import match_library
+import re
+import sys
+import csv
+
+__author__ = "Marcel Kempenaar"
+__contact__ = "brs@nbic.nl"
+__copyright__ = "Copyright, 2012, Netherlands Bioinformatics Centre"
+__license__ = "MIT"
+
+def create_lookup_table(library_file, column_type_name, statphase):
+    '''
+    Creates a dictionary holding the contents of the library to be searched
+    @param library_file: library to read
+    @param column_type_name: the columns type name
+    @param statphase: the columns stationary phase
+    '''
+    (data, header) = match_library.read_library(library_file)
+    # Test for presence of required columns
+    if ('columntype' not in header or
+        'columnphasetype' not in header or
+        'cas' not in header):
+        raise IOError('Missing columns in ', library_file)
+
+    column_type_column = header.index("columntype")
+    statphase_column = header.index("columnphasetype")
+    cas_column = header.index("cas")
+
+    filtered_library = [line for line in data if line[column_type_column] == column_type_name
+                        and line[statphase_column] == statphase]
+    lookup_dict = {}
+    for element in filtered_library:
+        # Here the cas_number is set to the numeric part of the cas_column value, so if the
+        # cas_column value is 'C1433' then cas_number will be '1433'
+        cas_number = str(re.findall(r'\d+', (element[cas_column]).strip())[0])
+        try:
+            lookup_dict[cas_number].append(element)
+        except KeyError:
+            lookup_dict[cas_number] = [element]
+    return lookup_dict
+
+
+def _preferred(hits, pref, ctype, polar, model, method):
+    '''
+    Returns all entries in the lookup_dict that have the same column name, type and polarity
+    as given by the user, uses regression if selected given the model and method to use. The
+    regression is applied on the column with the best R-squared value in the model
+    @param hits: all entries in the lookup_dict for the given CAS number
+    @param pref: preferred GC-column, can be one or more names
+    @param ctype: column type (capillary etc.)
+    @param polar: polarity (polar / non-polar etc.)
+    @param model: data loaded from file containing regression models
+    @param method: supported regression method (i.e. poly(nomial) or linear)
+    '''
+    match = []
+    for column in pref:
+        for hit in hits:
+            if hit[4] == ctype and hit[5] == polar and hit[6] == column:
+                # Create copy of found hit since it will be altered downstream
+                match.extend(hit)
+                return match, False
+
+    # No hit found for current CAS number, return if not performing regression
+    if not model:
+        return False, False
+
+    # Perform regression
+    for column in pref:
+        if column not in model:
+            break
+        # Order regression candidates by R-squared value (last element)
+        order = sorted(model[column].items(), key=lambda col: col[1][-1])
+        # Create list of regression candidate column names
+        regress_columns = list(reversed([column for (column, _) in order]))
+        # Names of available columns
+        available = [hit[6] for hit in hits]
+
+        # TODO: combine Rsquared and number of datapoints to get the best regression match
+        '''
+        # Iterate regression columns (in order) and retrieve their models
+        models = {}
+        for col in regress_columns:
+            if col in available:
+                hit = list(hits[available.index(col)])
+                if hit[4] == ctype:
+                    # models contains all model data including residuals [-2] and rsquared [-1]
+                    models[pref[0]] = model[pref[0]][hit[6]]
+        # Get the combined maximum for residuals and rsquared
+        best_match = models[]
+        # Apply regression
+        if method == 'poly':
+            regressed = _apply_poly_regression(best_match, hit[6], float(hit[3]), model)
+            if regressed:
+                hit[3] = regressed
+            else:
+                return False, False
+            else:
+                hit[3] = _apply_linear_regression(best_match, hit[6], float(hit[3]), model)
+                match.extend(hit)
+            return match, hit[6]
+        '''
+
+        for col in regress_columns:
+            if col in available:
+                hit = list(hits[available.index(col)])
+                if hit[4] == ctype:
+                    # Perform regression using a column for which regression is possible
+                    if method == 'poly':
+                        # Polynomial is only possible within a set border, if the RI falls outside
+                        # of this border, skip this lookup
+                        regressed = _apply_poly_regression(pref[0], hit[6], float(hit[3]), model)
+                        if regressed:
+                            hit[3] = regressed
+                        else:
+                            return False, False
+                    else:
+                        hit[3] = _apply_linear_regression(pref[0], hit[6], float(hit[3]), model)
+                    match.extend(hit)
+                    return match, hit[6]
+
+    return False, False
+
+
+
+def default_hit(row, cas_nr, compound_id):
+    '''
+    This method will return a "default"/empty hit for cases where the
+    method _preferred() returns False (i.e. a RI could not be found
+    for the given cas nr, also not via regression.
+    '''
+    return [
+            #'CAS',
+            'C' + cas_nr,
+            #'NAME',
+            '',
+            #'FORMULA',
+            '',
+            #'RI',
+            '0.0',
+            #'Column.type',
+            '',
+            #'Column.phase.type',
+            '',
+            #'Column.name',
+            '',
+            #'phase.coding',
+            ' ',
+            #'CAS_column.Name',
+            '',
+            #'Centrotype', -> NOTE THAT compound_id is not ALWAYS centrotype...depends on MsClust algorithm used...for now only one MsClust algorithm is used so it is not an issue, but this should be updated/corrected once that changes
+            compound_id,
+            #'Regression.Column.Name',
+            '',
+            #'min',
+            '',
+            #'max',
+            '',
+            #'nr.duplicates',
+            '']
+
+
+def format_result(lookup_dict, nist_tabular_filename, pref, ctype, polar, model, method):
+    '''
+    Looks up the compounds in the library lookup table and formats the results
+    @param lookup_dict: dictionary containing the library to be searched
+    @param nist_tabular_filename: NIST output file to be matched
+    @param pref: (list of) column-name(s) to look for
+    @param ctype: column type of interest
+    @param polar: polarity of the used column
+    @param model: data loaded from file containing regression models
+    @param method: supported regression method (i.e. poly(nomial) or linear)
+    '''
+    (nist_tabular_list, header_clean) = match_library.read_library(nist_tabular_filename)
+    # Retrieve indices of the CAS and compound_id columns (exit if not present)
+    try:
+        casi = header_clean.index("cas")
+        idi = header_clean.index("id")
+    except:
+        raise IOError("'CAS' or 'compound_id' not found in header of library file")
+
+    data = []
+    for row in nist_tabular_list:
+        casf = str(row[casi].replace('-', '').strip())
+        compound_id = str(row[idi].split('-')[0])
+        if casf in lookup_dict:
+            found_hit, regress = _preferred(lookup_dict[casf], pref, ctype, polar, model, method)
+            if found_hit:
+                # Keep cas nr as 'C'+ numeric part:
+                found_hit[0] = 'C' + casf
+                # Add compound id
+                found_hit.insert(9, compound_id)
+                # Add information on regression process
+                found_hit.insert(10, regress if regress else 'None')
+                # Replace column index references with actual number of duplicates
+                dups = len(found_hit[-1].split(','))
+                if dups > 1:
+                    found_hit[-1] = str(dups + 1)
+                else:
+                    found_hit[-1] = '0'
+                data.append(found_hit)
+                found_hit = ''
+            else:
+                data.append(default_hit(row, casf, compound_id))
+        else:
+            data.append(default_hit(row, casf, compound_id))
+
+        casf = ''
+        compound_id = ''
+        found_hit = []
+        dups = []
+    return data
+
+
+def _save_data(content, outfile):
+    '''
+    Write to output file
+    @param content: content to write
+    @param outfile: file to write to
+    '''
+    # header
+    header = ['CAS',
+              'NAME',
+              'FORMULA',
+              'RI',
+              'Column.type',
+              'Column.phase.type',
+              'Column.name',
+              'phase.coding',
+              'CAS_column.Name',
+              'Centrotype',
+              'Regression.Column.Name',
+              'min',
+              'max',
+              'nr.duplicates']
+    output_handle = csv.writer(open(outfile, 'wb'), delimiter="\t")
+    output_handle.writerow(header)
+    for entry in content:
+        output_handle.writerow(entry)
+
+
+def _read_model(model_file):
+    '''
+    Creates an easy to search dictionary for getting the regression parameters
+    for each valid combination of GC-columns
+    @param model_file: filename containing the regression models
+    '''
+    regress = list(csv.reader(open(model_file, 'rU'), delimiter='\t'))
+    if len(regress.pop(0)) > 9:
+        method = 'poly'
+    else:
+        method = 'linear'
+
+    model = {}
+    # Create new dictionary for each GC-column
+    for line in regress:
+        model[line[0]] = {}
+
+    # Add data
+    for line in regress:
+        if method == 'poly':
+            model[line[0]][line[1]] = [float(col) for col in line[2:11]]
+        else:  # linear
+            model[line[0]][line[1]] = [float(col) for col in line[2:9]]
+
+    return model, method
+
+
+def _apply_poly_regression(column1, column2, retention_index, model):
+    '''
+    Calculates a new retention index (RI) value using a given 3rd-degree polynomial
+    model based on data from GC columns 1 and 2
+    @param column1: name of the selected GC-column
+    @param column2: name of the GC-column to use for regression
+    @param retention_index: RI to convert
+    @param model: dictionary containing model information for all GC-columns
+    '''
+    coeff = model[column1][column2]
+    # If the retention index to convert is within range of the data the model is based on, perform regression
+    if coeff[4] < retention_index < coeff[5]:
+        return (coeff[3] * (retention_index ** 3) + coeff[2] * (retention_index ** 2) +
+                (retention_index * coeff[1]) + coeff[0])
+    else:
+        return False
+
+
+def _apply_linear_regression(column1, column2, retention_index, model):
+    '''
+    Calculates a new retention index (RI) value using a given linear model based on data
+    from GC columns 1 and 2
+    @param column1: name of the selected GC-column
+    @param column2: name of the GC-column to use for regression
+    @param retention_index: RI to convert
+    @param model: dictionary containing model information for all GC-columns
+    '''
+    # TODO: No use of limits
+    coeff = model[column1][column2]
+    return coeff[1] * retention_index + coeff[0]
+
+
+def main():
+    '''
+    Library Lookup main function
+    '''
+    library_file = sys.argv[1]
+    nist_tabular_filename = sys.argv[2]
+    ctype = sys.argv[3]
+    polar = sys.argv[4]
+    outfile = sys.argv[5]
+    pref = sys.argv[6:-1]
+    regress = sys.argv[-1]
+
+    if regress != 'False':
+        model, method = _read_model(regress)
+    else:
+        model, method = False, None
+
+    lookup_dict = create_lookup_table(library_file, ctype, polar)
+    data = format_result(lookup_dict, nist_tabular_filename, pref, ctype, polar, model, method)
+
+    _save_data(data, outfile)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GCMS/library_lookup.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,75 @@
+<tool id="lookup_library" name="RIQC-Lookup RI for CAS numbers in library" version="1.0.2">
+  <description>Lookup or estimate the RI using a "known RI values" CAS numbers library</description>
+  <command interpreter="python">
+    library_lookup.py
+    $library_file
+    $input
+    "$col_type"
+    "$polarity"
+    $output
+    #for $ctype in $pref
+      ${ctype.columntype}
+    #end for
+    $regression.model
+  </command>
+  <inputs>
+  <!-- Regarding the <page> items: this blocks the use of this tool in Galaxy workflows. However,
+       alternatives like wrapping this in conditionals, repeats (to force a refresh_on_change as this option
+       is not working on its own) failed since the workflow editor does not support refreshes...not does the
+       workflow runtime support conditionals or repeats to be set at runtime. See also
+       galaxy-dev mail thread "when else" in <conditional> ? RE: refresh_on_change : is this a valid attribute? Any other ideas/options??"  -->
+    <page>
+      <param format="tabular" name="input" type="data" label="NIST identifications as tabular file"
+      		 help="Select a tab delimited NIST metabolite identifications file (converted from PDF)" />
+      <param name="library_file" type="select" label="CAS x RI Library file"
+      		 help="Select a library/lookup file containing RI values for CAS numbers on various chromatography columns "
+      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RI_DB_libraries")'/>
+    </page>
+    <page>
+      <param name="col_type" type="select" label="Select column type" refresh_on_change="true"
+	         display="radio" dynamic_options='get_column_type(library_file)'
+	         help="" />
+	</page>
+    <page>
+      <param name="polarity" type="select" label="Select polarity" refresh_on_change="true"
+             display="radio" dynamic_options='filter_column(library_file,col_type)'
+	         help="" />
+    </page>
+    <page>
+	  <conditional name="regression">
+		  <param name="regression_select" type="boolean" checked="false" label="Apply regression method"
+		  		 help="If no data for the selected column is present in the database, selecting this option will try
+		  		 	   to convert Retention Indices using data from other GC-columns with a regression method. Please
+		  		 	   note that only the first given GC-column above will be used for this, any alternatives will be
+		  		 	   ignored" />
+		  <when value="true">
+		  	<param name="model" format="tabular" type="data" label="Tabular file containing regression model"
+		  		   help="This file contains the coefficients used to perform the regression from one GC-column
+		  		         to another GC-column"/>
+		  </when>
+          <when value="false">
+            <param name="model" type="hidden" value="False" />
+          </when>
+	  </conditional>
+      <repeat name="pref" title="Select column name preference">
+		<param name="columntype" type="select" label="Column name" refresh_on_change="true"
+               dynamic_options='filter_column2(library_file, col_type, polarity)'
+	       	   help="Select one or more column names for filtering. The order defines the priority." />
+      </repeat>
+    </page>
+  </inputs>
+  <outputs>
+    <data format="tabular" label="${tool.name} on" name="output" />
+</outputs>
+<code file="match_library.py" />
+  <help>
+Performs a lookup of the RI values by matching CAS numbers from the given NIST identifications file to a library.
+If a direct match is NOT found for the preferred column name, a regression can be done to find
+the theoretical RI value based on known RI values for the CAS number on other column types (see step 4).
+If there is no match for the CAS number on any column type, then the record is not given a RI.
+
+
+
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GCMS/readme.txt	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,25 @@
+==Description==
+Galaxy tools for analyzing a PDF-result file generated by NIST-MSSearch by calculating ranks for
+each putative metabolite and matching with a Retention Index Database by CAS-number
+
+
+==Dependencies==
+- Galaxy
+- pdftotext (part of the 'poppler-utils' package (Ubuntu and Debian))
+- Python (>= 2.6) with the following package(s):
+  - numpy
+- R
+
+
+==Installation==
+
+- Optionally run the included Python unittests to verify the dependencies:
+      nosetests galaxy-dist/tools/GCMS
+
+
+
+==License==
+
+Licensing info can be found at the /LICENSE_AND_README folder files located
+in the root of this (PRIMS-metabolomics) project.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/METEXPtools/export_to_metexp_tabular.py	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,247 @@
+#!/usr/bin/env python
+# encoding: utf-8
+'''
+Module to combine output from the GCMS Galaxy tools RankFilter, CasLookup and MsClust
+into a tabular file that can be uploaded to the MetExp database.
+
+RankFilter, CasLookup are already combined by combine_output.py so here we will use
+this result. Furthermore here one of the MsClust
+quantification files containing the respective spectra details are to be combined as well.
+
+Extra calculations performed:
+- The column MW is also added here and is derived from the column FORMULA found
+  in RankFilter, CasLookup combined result.
+
+So in total here we merge 2 files and calculate one new column.
+'''
+from pkg_resources import resource_filename  # @UnresolvedImport # pylint: disable=E0611
+import csv
+import re
+import sys
+from collections import OrderedDict
+
+__author__ = "Pieter Lukasse"
+__contact__ = "pieter.lukasse@wur.nl"
+__copyright__ = "Copyright, 2013, Plant Research International, WUR"
+__license__ = "Apache v2"
+
+def _process_data(in_csv, delim='\t'):
+    '''
+    Generic method to parse a tab-separated file returning a dictionary with named columns
+    @param in_csv: input filename to be parsed
+    '''
+    data = list(csv.reader(open(in_csv, 'rU'), delimiter=delim))
+    header = data.pop(0)
+    # Create dictionary with column name as key
+    output = OrderedDict()
+    for index in xrange(len(header)):
+        output[header[index]] = [row[index] for row in data]
+    return output
+
+ONE_TO_ONE = 'one_to_one'
+N_TO_ONE = 'n_to_one'
+
+def _merge_data(set1, link_field_set1, set2, link_field_set2, compare_function, merge_function, metadata, relation_type=ONE_TO_ONE):
+    '''
+    Merges data from both input dictionaries based on the link fields. This method will
+    build up a new list containing the merged hits as the items.
+    @param set1: dictionary holding set1 in the form of N lists (one list per attribute name)
+    @param set2: dictionary holding set2 in the form of N lists (one list per attribute name)
+    '''
+    # TODO test for correct input files -> same link_field values should be there
+    # (test at least number of unique link_field values):
+    #
+    # if (len(set1[link_field_set1]) != len(set2[link_field_set2])):
+    #    raise Exception('input files should have the same nr of key values  ')
+
+
+    merged = []
+    processed = {}
+    for link_field_set1_idx in xrange(len(set1[link_field_set1])):
+        link_field_set1_value = set1[link_field_set1][link_field_set1_idx]
+        if not link_field_set1_value in processed :
+            # keep track of processed items to not repeat them
+            processed[link_field_set1_value] = link_field_set1_value
+
+            # Get the indices for current link_field_set1_value in both data-structures for proper matching
+            set1index = [index for index, value in enumerate(set1[link_field_set1]) if value == link_field_set1_value]
+            set2index = [index for index, value in enumerate(set2[link_field_set2]) if compare_function(value, link_field_set1_value)==True ]
+            # Validation :
+            if len(set2index) == 0:
+                # means that corresponding data could not be found in set2, then throw error
+                raise Exception("Datasets not compatible, merge not possible. " + link_field_set1 + "=" +
+                                link_field_set1_value + " only found in first dataset. ")
+
+            merged_hits = []
+            # Combine hits
+            for hit in xrange(len(set1index)):
+                # Create records of hits to be merged ("keys" are the attribute names, so what the lines below do
+                # is create a new "dict" item with same "keys"/attributes, with each attribute filled with its
+                # corresponding value in the sets; i.e.
+                # set1[key] => returns the list/array with size = nrrows, with the values for the attribute
+                #                    represented by "key".
+                # set1index[hit] => points to the row nr=hit (hit is a rownr/index)
+                # So set1[x][set1index[n]] = set1.attributeX.instanceN
+                #
+                # It just ensures the entry is made available as a plain named array for easy access.
+                rf_record = OrderedDict(zip(set1.keys(), [set1[key][set1index[hit]] for key in set1.keys()]))
+                if relation_type == ONE_TO_ONE :
+                    cl_record = OrderedDict(zip(set2.keys(), [set2[key][set2index[hit]] for key in set2.keys()]))
+                else:
+                    # is N to 1:
+                    cl_record = OrderedDict(zip(set2.keys(), [set2[key][set2index[0]] for key in set2.keys()]))
+
+                merged_hit = merge_function(rf_record, cl_record, metadata)
+                merged_hits.append(merged_hit)
+
+            merged.append(merged_hits)
+
+    return merged
+
+
+def _compare_records(key1, key2):
+    '''
+    in this case the compare method is really simple as both keys are expected to contain
+    same value when records are the same
+    '''
+    if key1 == key2:
+        return True
+    else:
+        return False
+
+
+
+def _merge_records(rank_caslookup_combi, msclust_quant_record, metadata):
+    '''
+    Combines single records from both the RankFilter+CasLookup combi file and from MsClust file
+
+    @param rank_caslookup_combi: rankfilter and caslookup combined record (see combine_output.py)
+    @param msclust_quant_record: msclust quantification + spectrum record
+    '''
+    record = []
+    for column in rank_caslookup_combi:
+        record.append(rank_caslookup_combi[column])
+
+    for column in msclust_quant_record:
+        record.append(msclust_quant_record[column])
+
+    for column in metadata:
+        record.append(metadata[column])
+
+    # add MOLECULAR MASS (MM)
+    molecular_mass = get_molecular_mass(rank_caslookup_combi['FORMULA'])
+    # limit to two decimals:
+    record.append("{0:.2f}".format(molecular_mass))
+
+    # add MOLECULAR WEIGHT (MW) - TODO - calculate this
+    record.append('0.0')
+
+    # level of identification and Location of reference standard
+    record.append('0')
+    record.append('')
+
+    return record
+
+
+def get_molecular_mass(formula):
+    '''
+    Calculates the molecular mass (MM).
+    E.g. MM of H2O = (relative)atomic mass of H x2 + (relative)atomic mass of O
+    '''
+
+    # Each element is represented by a capital letter, followed optionally by
+    # lower case, with one or more digits as for how many elements:
+    element_pattern = re.compile("([A-Z][a-z]?)(\d*)")
+
+    total_mass = 0
+    for (element_name, count) in element_pattern.findall(formula):
+        if count == "":
+            count = 1
+        else:
+            count = int(count)
+        element_mass = float(elements_and_masses_map[element_name])  # "found: Python's built-in float type has double precision " (? check if really correct ?)
+        total_mass += element_mass * count
+
+    return total_mass
+
+
+
+def _save_data(data, headers, out_csv):
+    '''
+    Writes tab-separated data to file
+    @param data: dictionary containing merged dataset
+    @param out_csv: output csv file
+    '''
+
+    # Open output file for writing
+    outfile_single_handle = open(out_csv, 'wb')
+    output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
+
+    # Write headers
+    output_single_handle.writerow(headers)
+
+    # Write
+    for item_idx in xrange(len(data)):
+        for hit in data[item_idx]:
+            output_single_handle.writerow(hit)
+
+
+def _get_map_for_elements_and_masses(elements_and_masses):
+    '''
+    This method will read out the column 'Chemical symbol' and make a map
+    of this, storing the column 'Relative atomic mass' as its value
+    '''
+    resultMap = {}
+    index = 0
+    for entry in elements_and_masses['Chemical symbol']:
+        resultMap[entry] = elements_and_masses['Relative atomic mass'][index]
+        index += 1
+
+    return resultMap
+
+
+def init_elements_and_masses_map():
+    '''
+    Initializes the lookup map containing the elements and their respective masses
+    '''
+    elements_and_masses = _process_data(resource_filename(__name__, "static_resources/elements_and_masses.tab"))
+    global elements_and_masses_map
+    elements_and_masses_map = _get_map_for_elements_and_masses(elements_and_masses)
+
+
+def main():
+    '''
+    Combine Output main function
+
+    RankFilter, CasLookup are already combined by combine_output.py so here we will use
+    this result. Furthermore here the MsClust spectra file (.MSP) and one of the MsClust
+    quantification files are to be combined with combine_output.py result as well.
+    '''
+    rankfilter_and_caslookup_combined_file = sys.argv[1]
+    msclust_quantification_and_spectra_file = sys.argv[2]
+    output_csv = sys.argv[3]
+    # metadata
+    metadata = OrderedDict()
+    metadata['organism'] = sys.argv[4]
+    metadata['tissue'] = sys.argv[5]
+    metadata['experiment_name'] = sys.argv[6]
+    metadata['user_name'] = sys.argv[7]
+    metadata['column_type'] = sys.argv[8]
+
+    # Read RankFilter and CasLookup output files
+    rankfilter_and_caslookup_combined = _process_data(rankfilter_and_caslookup_combined_file)
+    msclust_quantification_and_spectra = _process_data(msclust_quantification_and_spectra_file, ',')
+
+    # Read elements and masses to use for the MW/MM calculation :
+    init_elements_and_masses_map()
+
+    merged = _merge_data(rankfilter_and_caslookup_combined, 'Centrotype',
+                                msclust_quantification_and_spectra, 'centrotype',
+                                _compare_records, _merge_records, metadata,
+                                N_TO_ONE)
+    headers = rankfilter_and_caslookup_combined.keys() + msclust_quantification_and_spectra.keys() + metadata.keys() + ['MM','MW', 'Level of identification', 'Location of reference standard']
+    _save_data(merged, headers, output_csv)
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/METEXPtools/export_to_metexp_tabular.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,75 @@
+<tool id="export_to_metexp_tabular"
+    name="METEXP - Tabular file"
+    version="0.2.0">
+  <description>Create tabular file for loading into METabolomics EXPlorer database</description>
+  <command interpreter="python">
+    export_to_metexp_tabular.py
+    $rankfilter_and_caslookup_combi
+    $msclust_quant_file
+    $output_result
+    "$organism"
+    "$tissue"
+    "$experiment_name"
+    "$user_name"
+    "$column_type"
+  </command>
+  <inputs>
+    <param format="tabular" name="rankfilter_and_caslookup_combi" type="data" label="RIQC-Combine RankFilter and CasLookup output"
+    	help="Select the (multi) output file from the 'Combine RankFilter and CasLookup' tool"/>
+    <param format="tabular" name="msclust_quant_file" type="data" label="MusClust-quantification file output"
+    	help="Select the output file from MsClust (centrotype, mic or sim) which also contain respective spectrum details"/>
+
+
+   <param name="organism" type="text" size="80"
+           label="Organism(s) info"
+           help="Metadata information to accompany the results when stored in MetExp DB." >
+           <validator type="empty_field" message="A value is required."></validator><!-- attribute optional="False" does not seem to work for params so validator is added -->
+    </param>
+
+   <param name="tissue" type="text" size="80"
+           label="Tissue(s) info"
+           help="Metadata information to accompany the results when stored in MetExp DB."  >
+           <validator type="empty_field" message="A value is required."></validator>
+    </param>
+
+   <param name="experiment_name" type="text" size="80"
+           label="Experiment name/code"
+           help="Name or code to store the results under. This can help you find the results back in MetExpDB."  >
+           <validator type="empty_field" message="A value is required."></validator>
+    </param>
+
+   <param name="user_name" type="text" size="80"
+           label="User name"
+           help="User name or code to store the results under. This can help you find the results back in MetExpDB."  >
+           <validator type="empty_field" message="A value is required."></validator>
+    </param>
+
+    <param name="column_type" type="text" size="80"
+           label="Column type"
+           help="Column type to report with the results. This can help you find the results back in MetExpDB."  >
+           <validator type="empty_field" message="A value is required."></validator>
+    </param>
+
+  </inputs>
+  <outputs>
+    <data format="tabular" label="${tool.name} on ${on_string}" name="output_result" />
+  </outputs>
+  <help>
+.. class:: infomark
+
+Tool to combine output from the tools RankFilter, CasLookup and MsClust
+into a tabular file that can be uploaded to the METabolomics EXPlorer (MetExp) database.
+
+RankFilter, CasLookup are already combined by 'RIQC-Combine RankFilter and CasLookup' tool so here we will use
+this result.
+
+**Notes**
+
+Extra calculations performed:
+- The columns MM and MW are also added here and are derived from the column FORMULA found in RankFilter, CasLookup combined result.
+
+So in total here we merge 2 files and calculate one new column.
+
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/METEXPtools/query_metexp.py	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,282 @@
+#!/usr/bin/env python
+# encoding: utf-8
+'''
+Module to query a set of identifications against the METabolomics EXPlorer database.
+
+It will take the input file and for each record it will query the
+molecular mass in the selected MetExp DB. If one or more compounds are found in the
+MetExp DB then extra information regarding these compounds is added to the output file.
+
+The output file is thus the input file enriched with information about
+related items found in the selected MetExp DB.
+'''
+import csv
+import sys
+import fileinput
+import urllib2
+import time
+from collections import OrderedDict
+
+__author__ = "Pieter Lukasse"
+__contact__ = "pieter.lukasse@wur.nl"
+__copyright__ = "Copyright, 2014, Plant Research International, WUR"
+__license__ = "Apache v2"
+
+def _process_file(in_xsv, delim='\t'):
+    '''
+    Generic method to parse a tab-separated file returning a dictionary with named columns
+    @param in_csv: input filename to be parsed
+    '''
+    data = list(csv.reader(open(in_xsv, 'rU'), delimiter=delim))
+    return _process_data(data)
+
+def _process_data(data):
+
+    header = data.pop(0)
+    # Create dictionary with column name as key
+    output = OrderedDict()
+    for index in xrange(len(header)):
+        output[header[index]] = [row[index] for row in data]
+    return output
+
+
+def _query_and_add_data(input_data, casid_col, formula_col, molecular_mass_col, metexp_dblink, separation_method):
+    '''
+    This method will iterate over the record in the input_data and
+    will enrich them with the related information found (if any) in the
+    MetExp Database.
+
+    # TODO : could optimize this with multi-threading, see also nice example at http://stackoverflow.com/questions/2846653/python-multithreading-for-dummies
+    '''
+    merged = []
+
+    for i in xrange(len(input_data[input_data.keys()[0]])):
+        # Get the record in same dictionary format as input_data, but containing
+        # a value at each column instead of a list of all values of all records:
+        input_data_record = OrderedDict(zip(input_data.keys(), [input_data[key][i] for key in input_data.keys()]))
+
+        # read the molecular mass and formula:
+        cas_id = input_data_record[casid_col]
+        formula = input_data_record[formula_col]
+        molecular_mass = input_data_record[molecular_mass_col]
+
+        # search for related records in MetExp:
+        data_found = None
+        if cas_id != "undef":
+            # 1- search for other experiments where this CAS id has been found:
+            query_link = metexp_dblink + "/find_entries/query?cas_nr="+ cas_id + "&method=" + separation_method
+            data_found = _fire_query_and_return_dict(query_link + "&_format_result=tsv")
+            data_type_found = "CAS"
+        if data_found == None:
+            # 2- search for other experiments where this FORMULA has been found:
+            query_link = metexp_dblink + "/find_entries/query?molecule_formula="+ formula + "&method=" + separation_method
+            data_found = _fire_query_and_return_dict(query_link + "&_format_result=tsv")
+            data_type_found = "FORMULA"
+        if data_found == None:
+            # 3- search for other experiments where this MM has been found:
+            query_link = metexp_dblink + "/find_entries/query?molecule_mass="+ molecular_mass + "&method=" + separation_method
+            data_found = _fire_query_and_return_dict(query_link + "&_format_result=tsv")
+            data_type_found = "MM"
+
+        if data_found == None:
+            # If still nothing found, just add empty columns
+            extra_cols = ['', '','','','','','','']
+        else:
+            # Add info found:
+            extra_cols = _get_extra_info_and_link_cols(data_found, data_type_found, query_link)
+
+        # Take all data and merge it into a "flat"/simple array of values:
+        field_values_list = _merge_data(input_data_record, extra_cols)
+
+        merged.append(field_values_list)
+
+    # return the merged/enriched records:
+    return merged
+
+
+def _get_extra_info_and_link_cols(data_found, data_type_found, query_link):
+    '''
+    This method will go over the data found and will return a
+    list with the following items:
+    - Experiment details where hits have been found :
+        'organism', 'tissue','experiment_name','user_name','column_type'
+    - Link that executes same query
+
+    '''
+    # set() makes a unique list:
+    organism_set = []
+    tissue_set = []
+    experiment_name_set = []
+    user_name_set = []
+    column_type_set = []
+    cas_nr_set = []
+
+    if 'organism' in data_found:
+        organism_set = set(data_found['organism'])
+    if 'tissue' in data_found:
+        tissue_set = set(data_found['tissue'])
+    if 'experiment_name' in data_found:
+        experiment_name_set = set(data_found['experiment_name'])
+    if 'user_name' in data_found:
+        user_name_set = set(data_found['user_name'])
+    if 'column_type' in data_found:
+        column_type_set = set(data_found['column_type'])
+    if 'CAS' in data_found:
+        cas_nr_set = set(data_found['CAS'])
+
+
+    result = [data_type_found,
+              _to_xsv(organism_set),
+              _to_xsv(tissue_set),
+              _to_xsv(experiment_name_set),
+              _to_xsv(user_name_set),
+              _to_xsv(column_type_set),
+              _to_xsv(cas_nr_set),
+              #To let Excel interpret as link, use e.g. =HYPERLINK("http://stackoverflow.com", "friendly name"):
+              "=HYPERLINK(\""+ query_link + "\", \"Link to entries found in DB \")"]
+    return result
+
+
+def _to_xsv(data_set):
+    result = ""
+    for item in data_set:
+        result = result + str(item) + "|"
+    return result
+
+
+def _fire_query_and_return_dict(url):
+    '''
+    This method will fire the query as a web-service call and
+    return the results as a list of dictionary objects
+    '''
+
+    try:
+        data = urllib2.urlopen(url).read()
+
+        # transform to dictionary:
+        result = []
+        data_rows = data.split("\n")
+
+        # check if there is any data in the response:
+        if len(data_rows) <= 1 or data_rows[1].strip() == '':
+            # means there is only the header row...so no hits:
+            return None
+
+        for data_row in data_rows:
+            if not data_row.strip() == '':
+                row_as_list = _str_to_list(data_row, delimiter='\t')
+                result.append(row_as_list)
+
+        # return result processed into a dict:
+        return _process_data(result)
+
+    except urllib2.HTTPError, e:
+        raise Exception( "HTTP error for URL: " + url + " : %s - " % e.code + e.reason)
+    except urllib2.URLError, e:
+        raise Exception( "Network error: %s" % e.reason.args[1] + ". Administrator: please check if MetExp service [" + url + "] is accessible from your Galaxy server. ")
+
+def _str_to_list(data_row, delimiter='\t'):
+    result = []
+    for column in data_row.split(delimiter):
+        result.append(column)
+    return result
+
+
+# alternative: ?
+#     s = requests.Session()
+#     s.verify = False
+#     #s.auth = (token01, token02)
+#     resp = s.get(url, params={'name': 'anonymous'}, stream=True)
+#     content = resp.content
+#     # transform to dictionary:
+
+
+
+
+def _merge_data(input_data_record, extra_cols):
+    '''
+    Adds the extra information to the existing data record and returns
+    the combined new record.
+    '''
+    record = []
+    for column in input_data_record:
+        record.append(input_data_record[column])
+
+
+    # add extra columns
+    for column in extra_cols:
+        record.append(column)
+
+    return record
+
+
+def _save_data(data_rows, headers, out_csv):
+    '''
+    Writes tab-separated data to file
+    @param data_rows: dictionary containing merged/enriched dataset
+    @param out_csv: output csv file
+    '''
+
+    # Open output file for writing
+    outfile_single_handle = open(out_csv, 'wb')
+    output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
+
+    # Write headers
+    output_single_handle.writerow(headers)
+
+    # Write one line for each row
+    for data_row in data_rows:
+        output_single_handle.writerow(data_row)
+
+def _get_metexp_URL(metexp_dblink_file):
+    '''
+    Read out and return the URL stored in the given file.
+    '''
+    file_input = fileinput.input(metexp_dblink_file)
+    try:
+        for line in file_input:
+            if line[0] != '#':
+                # just return the first line that is not a comment line:
+                return line
+    finally:
+        file_input.close()
+
+
+def main():
+    '''
+    MetExp Query main function
+
+    The input file can be any tabular file, as long as it contains a column for the molecular mass
+    and one for the formula of the respective identification. These two columns are then
+    used to query against MetExp Database.
+    '''
+    seconds_start = int(round(time.time()))
+
+    input_file = sys.argv[1]
+    casid_col = sys.argv[2]
+    formula_col = sys.argv[3]
+    molecular_mass_col = sys.argv[4]
+    metexp_dblink_file = sys.argv[5]
+    separation_method = sys.argv[6]
+    output_result = sys.argv[7]
+
+    # Parse metexp_dblink_file to find the URL to the MetExp service:
+    metexp_dblink = _get_metexp_URL(metexp_dblink_file)
+
+    # Parse tabular input file into dictionary/array:
+    input_data = _process_file(input_file)
+
+    # Query data against MetExp DB :
+    enriched_data = _query_and_add_data(input_data, casid_col, formula_col, molecular_mass_col, metexp_dblink, separation_method)
+    headers = input_data.keys() + ['METEXP hits for ','METEXP hits: organisms', 'METEXP hits: tissues',
+                                   'METEXP hits: experiments','METEXP hits: user names','METEXP hits: column types', 'METEXP hits: CAS nrs', 'Link to METEXP hits']
+
+    _save_data(enriched_data, headers, output_result)
+
+    seconds_end = int(round(time.time()))
+    print "Took " + str(seconds_end - seconds_start) + " seconds"
+
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/METEXPtools/query_metexp.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,69 @@
+<tool id="query_metexp"
+    name="METEXP - Query Database "
+    version="0.1.0">
+  <description>Query a set of identifications against the METabolomics EXPeriments database</description>
+  <command interpreter="python">
+    query_metexp.py
+    $input_file
+    "$casid_col"
+    "$formula_col"
+    "$molecular_mass_col"
+    "$metexp_dblink_file"
+    $separation_method
+    $output_result
+  </command>
+  <inputs>
+
+   <param name="input_file" format="tabular" type="data"
+        label="Input file"
+    	help="Select a tabular file containing the entries to be queried/verified in the MetExp DB"/>
+
+  <param name="separation_method" type="select" label="Data type to query">
+  	<option value="GC" selected="True">GC</option>
+    <option value="LC">LC</option>
+  </param>
+
+
+   <param name="casid_col" type="text" size="50"
+           label="CAS ID column name"
+           value="CAS"
+           help="Name of the column containing the CAS code information (in the given input file)" />
+   <param name="formula_col" type="text" size="50"
+           label="Formula ID column name"
+           value="FORMULA"
+           help="Name of the column containing the formula information (in the given input file)" />
+   <param name="molecular_mass_col" type="text" size="50"
+           label="Molecular mass column name"
+           value="MM"
+           help="Name of the column containing the molecular mass information (in the given input file)" />
+
+   <param name="metexp_dblink_file" type="select" label="MetExp DB to query"
+      		 help="Select the MetExp Database/backend which should be queried"
+      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/MetExp_Databases")'/>
+
+
+  </inputs>
+  <outputs>
+    <data name="output_result" format="tabular" label="${tool.name} on ${on_string}" />
+  </outputs>
+  <code file="match_library.py" /> <!-- file containing get_directory_files function used above-->
+  <help>
+.. class:: infomark
+
+This tool will Query a set of identifications against the METabolomics EXPlorer database.
+
+It will take the input file and for each record it will query the
+molecular mass in the selected MetExp DB. If one or more compounds are found in the
+MetExp DB then extra information regarding these compounds is added to the output file.
+
+The output file is thus the input file enriched with information about
+related items found in the selected MetExp DB.
+
+**Notes**
+
+The input file can be any tabular file, as long as it contains a column for the molecular mass
+and one for the formula of the respective identification.
+
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/METEXPtools/static_resources/README.txt	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,3 @@
+This folder and respective files should be deployed together with the following tools:
+
+ - ../export_to_metexp_tabular.py
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/METEXPtools/static_resources/elements_and_masses.tab	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,104 @@
+Name	Atomic number	Chemical symbol	Relative atomic mass
+Hydrogen	1	H	1.01
+Helium	2	He	4
+Lithium	3	Li	6.94
+Beryllium	4	Be	9.01
+Boron	5	B	10.81
+Carbon	6	C	12.01
+Nitrogen	7	N	14.01
+Oxygen	8	O	16
+Fluorine	9	F	19
+Neon	10	Ne	20.18
+Sodium	11	Na	22.99
+Magnesium	12	Mg	24.31
+Aluminum	13	Al	26.98
+Silicon	14	Si	28.09
+Phosphorus	15	P	30.98
+Sulfur	16	S	32.06
+Chlorine	17	Cl	35.45
+Argon	18	Ar	39.95
+Potassium	19	K	39.1
+Calcium	20	Ca	40.08
+Scandium	21	Sc	44.96
+Titanium	22	Ti	47.9
+Vanadium	23	V	50.94
+Chromium	24	Cr	52
+Manganese	25	Mn	54.94
+Iron	26	Fe	55.85
+Cobalt	27	Co	58.93
+Nickel	28	Ni	58.71
+Copper	29	Cu	63.54
+Zinc	30	Zn	65.37
+Gallium	31	Ga	69.72
+Germanium	32	Ge	72.59
+Arsenic	33	As	74.99
+Selenium	34	Se	78.96
+Bromine	35	Br	79.91
+Krypton	36	Kr	83.8
+Rubidium	37	Rb	85.47
+Strontium	38	Sr	87.62
+Yttrium	39	Y	88.91
+Zirconium	40	Zr	91.22
+Niobium	41	Nb	92.91
+Molybdenum	42	Mo	95.94
+Technetium	43	Tc	96.91
+Ruthenium	44	Ru	101.07
+Rhodium	45	Rh	102.9
+Palladium	46	Pd	106.4
+Silver	47	Ag	107.87
+Cadmium	48	Cd	112.4
+Indium	49	In	114.82
+Tin	50	Sn	118.69
+Antimony	51	Sb	121.75
+Tellurium	52	Te	127.6
+Iodine	53	I	126.9
+Xenon	54	Xe	131.3
+Cesium	55	Cs	132.9
+Barium	56	Ba	137.34
+Lanthanum	57	La	138.91
+Cerium	58	Ce	140.12
+Praseodymium	59	Pr	140.91
+Neodymium	60	Nd	144.24
+Promethium	61	Pm	144.91
+Samarium	62	Sm	150.35
+Europium	63	Eu	151.96
+Gadolinium	64	Gd	157.25
+Terbium	65	Tb	158.92
+Dysprosium	66	Dy	162.5
+Holmium	67	Ho	164.93
+Erbium	68	Er	167.26
+Thulium	69	Tm	168.93
+Ytterbium	70	Yb	173.04
+Lutetium	71	Lu	174.97
+Hafnium	72	Hf	178.49
+Tantalum	73	Ta	180.95
+Wolfram	74	W	183.85
+Rhenium	75	Re	186.2
+Osmium	76	Os	190.2
+Iridium	77	Ir	192.22
+Platinum	78	Pt	195.09
+Gold	79	Au	196.97
+Mercury	80	Hg	200.59
+Thallium	81	Tl	204.37
+Lead	82	Pb	207.19
+Bismuth	83	Bi	208.98
+Polonium	84	Po	208.98
+Astatine	85	At	209.99
+Radon	86	Rn	222.02
+Francium	87	Fr	223.02
+Radium	88	Ra	226
+Actinium	89	Ac	227.03
+Thorium	90	Th	232.04
+Protactinium	91	Pa	231.04
+Uranium	92	U	238.03
+Neptunium	93	Np	237
+Plutonium	94	Pu	242
+Americium	95	Am	243.06
+Curium	96	Cm	247.07
+Berkelium	97	Bk	247.07
+Californium	98	Cf	251.08
+Einsteinium	99	Es	254.09
+Fermium	100	Fm	257.1
+Mendelevium	101	Md	257.1
+Nobelium	102	No	255.09
+Lawrencium	103	Lr	256.1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MS/__init__.py	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,6 @@
+'''
+Module containing Galaxy tools for the LC or GC/MS pipeline
+Created on Mar , 2014
+
+@author: pieter lukasse
+'''
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MS/query_mass_repos.py	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,289 @@
+#!/usr/bin/env python
+# encoding: utf-8
+'''
+Module to query a set of accurate mass values detected by high-resolution mass spectrometers
+against various repositories/services such as METabolomics EXPlorer database or the
+MFSearcher service (http://webs2.kazusa.or.jp/mfsearcher/).
+
+It will take the input file and for each record it will query the
+molecular mass in the selected repository/service. If one or more compounds are found
+then extra information regarding these compounds is added to the output file.
+
+The output file is thus the input file enriched with information about
+related items found in the selected repository/service.
+
+The service should implement the following interface:
+
+http://service_url/mass?targetMs=500&margin=1&marginUnit=ppm&output=txth   (txth means there is guaranteed to be a header line before the data)
+
+The output should be tab separated and should contain the following columns (in this order)
+db-name    molecular-formula    dbe    formula-weight    id    description
+
+
+'''
+import csv
+import sys
+import fileinput
+import urllib2
+import time
+from collections import OrderedDict
+
+__author__ = "Pieter Lukasse"
+__contact__ = "pieter.lukasse@wur.nl"
+__copyright__ = "Copyright, 2014, Plant Research International, WUR"
+__license__ = "Apache v2"
+
+def _process_file(in_xsv, delim='\t'):
+    '''
+    Generic method to parse a tab-separated file returning a dictionary with named columns
+    @param in_csv: input filename to be parsed
+    '''
+    data = list(csv.reader(open(in_xsv, 'rU'), delimiter=delim))
+    return _process_data(data)
+
+def _process_data(data):
+
+    header = data.pop(0)
+    # Create dictionary with column name as key
+    output = OrderedDict()
+    for index in xrange(len(header)):
+        output[header[index]] = [row[index] for row in data]
+    return output
+
+
+def _query_and_add_data(input_data, molecular_mass_col, repository_dblink, error_margin, margin_unit):
+
+    '''
+    This method will iterate over the record in the input_data and
+    will enrich them with the related information found (if any) in the
+    chosen repository/service
+
+    # TODO : could optimize this with multi-threading, see also nice example at http://stackoverflow.com/questions/2846653/python-multithreading-for-dummies
+    '''
+    merged = []
+
+    for i in xrange(len(input_data[input_data.keys()[0]])):
+        # Get the record in same dictionary format as input_data, but containing
+        # a value at each column instead of a list of all values of all records:
+        input_data_record = OrderedDict(zip(input_data.keys(), [input_data[key][i] for key in input_data.keys()]))
+
+        # read the molecular mass :
+        molecular_mass = input_data_record[molecular_mass_col]
+
+
+        # search for related records in repository/service:
+        data_found = None
+        if molecular_mass != "":
+            molecular_mass = float(molecular_mass)
+
+            # 1- search for data around this MM:
+            query_link = repository_dblink + "/mass?targetMs=" + str(molecular_mass) + "&margin=" + str(error_margin) + "&marginUnit=" + margin_unit + "&output=txth"
+
+            data_found = _fire_query_and_return_dict(query_link + "&_format_result=tsv")
+            data_type_found = "MM"
+
+
+        if data_found == None:
+            # If still nothing found, just add empty columns
+            extra_cols = ['', '','','','','']
+        else:
+            # Add info found:
+            extra_cols = _get_extra_info_and_link_cols(data_found, data_type_found, query_link)
+
+        # Take all data and merge it into a "flat"/simple array of values:
+        field_values_list = _merge_data(input_data_record, extra_cols)
+
+        merged.append(field_values_list)
+
+    # return the merged/enriched records:
+    return merged
+
+
+def _get_extra_info_and_link_cols(data_found, data_type_found, query_link):
+    '''
+    This method will go over the data found and will return a
+    list with the following items:
+    - details of hits found :
+         db-name    molecular-formula    dbe    formula-weight    id    description
+    - Link that executes same query
+
+    '''
+
+    # set() makes a unique list:
+    db_name_set = []
+    molecular_formula_set = []
+    id_set = []
+    description_set = []
+
+
+    if 'db-name' in data_found:
+        db_name_set = set(data_found['db-name'])
+    elif '# db-name' in data_found:
+        db_name_set = set(data_found['# db-name'])
+    if 'molecular-formula' in data_found:
+        molecular_formula_set = set(data_found['molecular-formula'])
+    if 'id' in data_found:
+        id_set = set(data_found['id'])
+    if 'description' in data_found:
+        description_set = set(data_found['description'])
+
+    result = [data_type_found,
+              _to_xsv(db_name_set),
+              _to_xsv(molecular_formula_set),
+              _to_xsv(id_set),
+              _to_xsv(description_set),
+              #To let Excel interpret as link, use e.g. =HYPERLINK("http://stackoverflow.com", "friendly name"):
+              "=HYPERLINK(\""+ query_link + "\", \"Link to entries found in DB \")"]
+    return result
+
+
+def _to_xsv(data_set):
+    result = ""
+    for item in data_set:
+        result = result + str(item) + "|"
+    return result
+
+
+def _fire_query_and_return_dict(url):
+    '''
+    This method will fire the query as a web-service call and
+    return the results as a list of dictionary objects
+    '''
+
+    try:
+        data = urllib2.urlopen(url).read()
+
+        # transform to dictionary:
+        result = []
+        data_rows = data.split("\n")
+
+        # remove comment lines if any (only leave the one that has "molecular-formula" word in it...compatible with kazusa service):
+        data_rows_to_remove = []
+        for data_row in data_rows:
+            if data_row == "" or (data_row[0] == '#' and "molecular-formula" not in data_row):
+                data_rows_to_remove.append(data_row)
+
+        for data_row in data_rows_to_remove:
+            data_rows.remove(data_row)
+
+        # check if there is any data in the response:
+        if len(data_rows) <= 1 or data_rows[1].strip() == '':
+            # means there is only the header row...so no hits:
+            return None
+
+        for data_row in data_rows:
+            if not data_row.strip() == '':
+                row_as_list = _str_to_list(data_row, delimiter='\t')
+                result.append(row_as_list)
+
+        # return result processed into a dict:
+        return _process_data(result)
+
+    except urllib2.HTTPError, e:
+        raise Exception( "HTTP error for URL: " + url + " : %s - " % e.code + e.reason)
+    except urllib2.URLError, e:
+        raise Exception( "Network error: %s" % e.reason.args[1] + ". Administrator: please check if service [" + url + "] is accessible from your Galaxy server. ")
+
+def _str_to_list(data_row, delimiter='\t'):
+    result = []
+    for column in data_row.split(delimiter):
+        result.append(column)
+    return result
+
+
+# alternative: ?
+#     s = requests.Session()
+#     s.verify = False
+#     #s.auth = (token01, token02)
+#     resp = s.get(url, params={'name': 'anonymous'}, stream=True)
+#     content = resp.content
+#     # transform to dictionary:
+
+
+
+
+def _merge_data(input_data_record, extra_cols):
+    '''
+    Adds the extra information to the existing data record and returns
+    the combined new record.
+    '''
+    record = []
+    for column in input_data_record:
+        record.append(input_data_record[column])
+
+
+    # add extra columns
+    for column in extra_cols:
+        record.append(column)
+
+    return record
+
+
+def _save_data(data_rows, headers, out_csv):
+    '''
+    Writes tab-separated data to file
+    @param data_rows: dictionary containing merged/enriched dataset
+    @param out_csv: output csv file
+    '''
+
+    # Open output file for writing
+    outfile_single_handle = open(out_csv, 'wb')
+    output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
+
+    # Write headers
+    output_single_handle.writerow(headers)
+
+    # Write one line for each row
+    for data_row in data_rows:
+        output_single_handle.writerow(data_row)
+
+def _get_repository_URL(repository_file):
+    '''
+    Read out and return the URL stored in the given file.
+    '''
+    file_input = fileinput.input(repository_file)
+    try:
+        for line in file_input:
+            if line[0] != '#':
+                # just return the first line that is not a comment line:
+                return line
+    finally:
+        file_input.close()
+
+
+def main():
+    '''
+    Query main function
+
+    The input file can be any tabular file, as long as it contains a column for the molecular mass.
+    This column is then used to query against the chosen repository/service Database.
+    '''
+    seconds_start = int(round(time.time()))
+
+    input_file = sys.argv[1]
+    molecular_mass_col = sys.argv[2]
+    repository_file = sys.argv[3]
+    error_margin = float(sys.argv[4])
+    margin_unit = sys.argv[5]
+    output_result = sys.argv[6]
+
+    # Parse repository_file to find the URL to the service:
+    repository_dblink = _get_repository_URL(repository_file)
+
+    # Parse tabular input file into dictionary/array:
+    input_data = _process_file(input_file)
+
+    # Query data against repository :
+    enriched_data = _query_and_add_data(input_data, molecular_mass_col, repository_dblink, error_margin, margin_unit)
+    headers = input_data.keys() + ['SEARCH hits for ','SEARCH hits: db-names', 'SEARCH hits: molecular-formulas ',
+                                   'SEARCH hits: ids','SEARCH hits: descriptions', 'Link to SEARCH hits']  #TODO - add min and max formula weigth columns
+
+    _save_data(enriched_data, headers, output_result)
+
+    seconds_end = int(round(time.time()))
+    print "Took " + str(seconds_end - seconds_start) + " seconds"
+
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MS/query_mass_repos.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,106 @@
+<tool id="query_mass_repos"
+    name="METEXP - Find elemental composition formulas based on mass values "
+    version="0.1.0">
+  <description>Query multiple public repositories for elemental compositions from accurate mass values detected by high-resolution mass spectrometers</description>
+  <command interpreter="python">
+    query_mass_repos.py
+    $input_file
+    "$molecular_mass_col"
+    "$repository_file"
+    $error_margin
+    $margin_unit
+    $output_result
+  </command>
+  <inputs>
+
+   <param name="input_file" format="tabular" type="data"
+        label="Input file"
+    	help="Select a tabular file containing the entries to be queried/verified in the MetExp DB"/>
+
+   <param name="molecular_mass_col" type="text" size="50"
+           label="Molecular mass column name"
+           value="MM"
+           help="Name of the column containing the molecular mass information (in the given input file)" />
+
+   <param name="repository_file" type="select" label="Repository/service to query"
+      		 help="Select the repository/service which should be queried"
+      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/MetExp_MassSearch_Services")'/>
+
+   <param name="error_margin" type="float" size="10"
+           label="Error marging"
+           value="0.01"
+           help="Mass difference allowed when searching in the repositories for a mass match." />
+
+   <param name="margin_unit" type="select" label="Margin unit">
+	  	<option value="ms" selected="True">ms</option>
+	    <option value="ppm">ppm</option>
+   </param>
+   <!-- TODO
+   <param name="metexp_access_key" type="text" size="50"
+           label="(Optional)MetExp access key"
+           value=""
+           help="Key needed to get access to MetExp services. Fill in if MetExp service was selected" />    -->
+
+  </inputs>
+  <outputs>
+    <data name="output_result" format="tabular" label="${tool.name} on ${on_string}" />
+  </outputs>
+  <code file="match_library.py" /> <!-- file containing get_directory_files function used above-->
+  <help>
+.. class:: infomark
+
+This tool will query multiple public repositories such as PRI-MetExp or http://webs2.kazusa.or.jp/mfsearcher
+for elemental compositions from accurate mass values detected by high-resolution mass spectrometers.
+
+It will take the input file and for each record it will query the
+molecular mass in the selected repository. If one or more compounds are found in the
+repository then extra information regarding (mass based)matching elemental composition formulas is added to the output file.
+
+The output file is thus the input file enriched with information about
+related items found in the selected repository.
+
+**Notes**
+
+The input file can be any tabular file, as long as it contains a column for the molecular mass.
+
+**Services that can be queried**
+
+================= =========================================================================
+Database          Description
+----------------- -------------------------------------------------------------------------
+PRI-MetExp        LC-MS and GC-MS data from experiments from the metabolomics group at
+                  Plant Research International. NB: restricted access to employees with
+                  access key.
+ExactMassDB       A database of possible elemental compositions consits of C: 100,
+                  H: 200, O: 50, N: 10, P: 10, and S: 10, that satisfy the Senior and
+                  the Lewis valence rules.
+                  (via /mfsearcher/exmassdb/)
+ExactMassDB-HR2   HR2, which is one of the fastest tools for calculation of elemental
+                  compositions, filters some elemental compositions according to
+                  the Seven Golden Rules (Kind and Fiehn, 2007). The ExactMassDB-HR2
+                  database returns the same result as does HR2 with the same atom kind
+                  and number condition as that used in construction of the ExactMassDB.
+                  (via /mfsearcher/exmassdb-hr2/)
+Pep1000           A database of possible linear polypeptides that are
+                  constructed with 20 kinds of amino acids and having molecular
+                  weights smaller than 1000.
+                  (via /mfsearcher/pep1000/)
+KEGG              Re-calculated compound data from KEGG. Weekly updated.
+                  (via /mfsearcher/kegg/)
+KNApSAcK          Re-calculated compound data from KNApSAcK.
+                  (via /mfsearcher/knapsack/)
+Flavonoid Viewer  Re-calculated compound data from Flavonoid Viewer .
+                  (via /mfsearcher/flavonoidviewer/
+LipidMAPS         Re-calculated compound data from LIPID MAPS.
+                  (via /mfsearcher/lipidmaps/)
+HMDB              Re-calculated compound data from Human Metabolome Database (HMDB)
+                  Version 3.5.
+                  (via /mfsearcher/hmdb/)
+PubChem           Re-calculated compound data from PubChem. Monthly updated.
+                  (via /mfsearcher/pubchem/)
+================= =========================================================================
+
+Sources for table above: PRI-MetExp and http://webs2.kazusa.or.jp/mfsearcher
+
+  </help>
+</tool>
--- a/combine_output.py	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,253 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-'''
-Module to combine output from two GCMS Galaxy tools (RankFilter and CasLookup)
-'''
-
-import csv
-import re
-import sys
-import math
-import pprint
-
-__author__ = "Marcel Kempenaar"
-__contact__ = "brs@nbic.nl"
-__copyright__ = "Copyright, 2012, Netherlands Bioinformatics Centre"
-__license__ = "MIT"
-
-def _process_data(in_csv):
-    '''
-    Generic method to parse a tab-separated file returning a dictionary with named columns
-    @param in_csv: input filename to be parsed
-    '''
-    data = list(csv.reader(open(in_csv, 'rU'), delimiter='\t'))
-    header = data.pop(0)
-    # Create dictionary with column name as key
-    output = {}
-    for index in xrange(len(header)):
-        output[header[index]] = [row[index] for row in data]
-    return output
-
-
-def _merge_data(rankfilter, caslookup):
-    '''
-    Merges data from both input dictionaries based on the Centrotype field. This method will
-    build up a new list containing the merged hits as the items.
-    @param rankfilter: dictionary holding RankFilter output in the form of N lists (one list per attribute name)
-    @param caslookup: dictionary holding CasLookup output in the form of N lists (one list per attribute name)
-    '''
-    # TODO: test for correct input files -> rankfilter and caslookup internal lists should have the same lenghts:
-    if (len(rankfilter['ID']) != len(caslookup['Centrotype'])):
-        raise Exception('rankfilter and caslookup files should have the same nr of rows/records ')
-
-    merged = []
-    processed = {}
-    for compound_id_idx in xrange(len(rankfilter['ID'])):
-        compound_id = rankfilter['ID'][compound_id_idx]
-        if not compound_id in processed :
-            # keep track of processed items to not repeat them
-            processed[compound_id] = compound_id
-            # get centrotype nr
-            centrotype = compound_id.split('-')[0]
-            # Get the indices for current compound ID in both data-structures for proper matching
-            rindex = [index for index, value in enumerate(rankfilter['ID']) if value == compound_id]
-            cindex = [index for index, value in enumerate(caslookup['Centrotype']) if value == centrotype]
-
-            merged_hits = []
-            # Combine hits
-            for hit in xrange(len(rindex)):
-                # Create records of hits to be merged ("keys" are the attribute names, so what the lines below do
-                # is create a new "dict" item with same "keys"/attributes, with each attribute filled with its
-                # corresponding value in the rankfilter or caslookup tables; i.e.
-                # rankfilter[key] => returns the list/array with size = nrrows, with the values for the attribute
-                #                    represented by "key". rindex[hit] => points to the row nr=hit (hit is a rownr/index)
-                rf_record = dict(zip(rankfilter.keys(), [rankfilter[key][rindex[hit]] for key in rankfilter.keys()]))
-                cl_record = dict(zip(caslookup.keys(), [caslookup[key][cindex[hit]] for key in caslookup.keys()]))
-
-                merged_hit = _add_hit(rf_record, cl_record)
-                merged_hits.append(merged_hit)
-
-            merged.append(merged_hits)
-
-    return merged, len(rindex)
-
-
-def _add_hit(rankfilter, caslookup):
-    '''
-    Combines single records from both the RankFilter- and CasLookup-tools
-    @param rankfilter: record (dictionary) of one compound in the RankFilter output
-    @param caslookup: matching record (dictionary) of one compound in the CasLookup output
-    '''
-    # The ID in the RankFilter output contains the following 5 fields:
-    rf_id = rankfilter['ID'].split('-')
-    try:
-        name, formula = _remove_formula(rankfilter['Name'])
-        hit = [rf_id[0], # Centrotype
-               rf_id[1], # cent.Factor
-               rf_id[2], # scan nr
-               rf_id[3], # R.T. (umin)
-               rf_id[4], # nr. Peaks
-               # Appending other fields
-               rankfilter['R.T.'],
-               name,
-               caslookup['FORMULA'] if not formula else formula,
-               rankfilter['Library'].strip(),
-               rankfilter['CAS'].strip(),
-               rankfilter['Forward'],
-               rankfilter['Reverse'],
-               ((float(rankfilter['Forward']) + float(rankfilter['Reverse'])) / 2),
-               rankfilter['RIexp'],
-               caslookup['RI'],
-               rankfilter['RIsvr'],
-               # Calculate absolute differences
-               math.fabs(float(rankfilter['RIexp']) - float(rankfilter['RIsvr'])),
-               math.fabs(float(caslookup['RI']) - float(rankfilter['RIexp'])),
-               caslookup['Regression.Column.Name'],
-               caslookup['min'],
-               caslookup['max'],
-               caslookup['nr.duplicates'],
-               caslookup['Column.phase.type'],
-               caslookup['Column.name'],
-               rankfilter['Rank'],
-               rankfilter['%rel.err'],
-               rankfilter['Synonyms']]
-    except KeyError as error:
-        print "Problem reading in data from input file(s):\n",
-        print "Respective CasLookup entry: \n", pprint.pprint(caslookup), "\n"
-        print "Respective RankFilter entry: \n", pprint.pprint(rankfilter), "\n"
-        raise error
-
-    return hit
-
-
-def _remove_formula(name):
-    '''
-    The RankFilter Name field often contains the Formula as well, this function removes it from the Name
-    @param name: complete name of the compound from the RankFilter output
-    '''
-    name = name.split()
-    poss_formula = name[-1]
-    match = re.match("^(([A-Z][a-z]{0,2})(\d*))+$", poss_formula)
-    if match:
-        return ' '.join(name[:-1]), poss_formula
-    else:
-        return ' '.join(name), False
-
-
-def _get_default_caslookup():
-    '''
-    The Cas Lookup tool might not have found all compounds in the library searched,
-    this default dict will be used to combine with the Rank Filter output
-    '''
-    return {'FORMULA': 'N/A',
-            'RI': '0.0',
-            'Regression.Column.Name': 'None',
-            'min': '0.0',
-            'max': '0.0',
-            'nr.duplicates': '0',
-            'Column.phase.type': 'N/A',
-            'Column.name': 'N/A'}
-
-
-def _save_data(data, nhits, out_csv_single, out_csv_multi):
-    '''
-    Writes tab-separated data to file
-    @param data: dictionary containing merged dataset
-    @param out_csv: output csv file
-    '''
-    # Columns we don't repeat:
-    header_part1 = ['Centrotype',
-              'cent.Factor',
-              'scan nr.',
-              'R.T. (umin)',
-              'nr. Peaks',
-              'R.T.']
-    # These are the headers/columns we repeat in case of
-    # combining hits in one line (see alternative_headers method below):
-    header_part2 = [
-              'Name',
-              'FORMULA',
-              'Library',
-              'CAS',
-              'Forward',
-              'Reverse',
-              'Avg. (Forward, Reverse)',
-              'RIexp',
-              'RI',
-              'RIsvr',
-              'RIexp - RIsvr',
-              'RI - RIexp',
-              'Regression.Column.Name',
-              'min',
-              'max',
-              'nr.duplicates',
-              'Column.phase.type',
-              'Column.name',
-              'Rank',
-              '%rel.err',
-              'Synonyms']
-
-    # Open output file for writing
-    outfile_single_handle = open(out_csv_single, 'wb')
-    outfile_multi_handle = open(out_csv_multi, 'wb')
-    output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
-    output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t")
-
-    # Write headers
-    output_single_handle.writerow(header_part1 + header_part2)
-    output_multi_handle.writerow(header_part1 + header_part2 + alternative_headers(header_part2, nhits-1))
-    # Combine all hits for each centrotype into one line
-    line = []
-    for centrotype_idx in xrange(len(data)):
-        i = 0
-        for hit in data[centrotype_idx]:
-            if i==0:
-                line.extend(hit)
-            else:
-                line.extend(hit[6:])
-            i = i+1
-        # small validation (if error, it is a programming error):
-        if i > nhits:
-            raise Exception('Error: more hits that expected for  centrotype_idx ' + centrotype_idx)
-        output_multi_handle.writerow(line)
-        line = []
-
-    # Write one line for each centrotype
-    for centrotype_idx in xrange(len(data)):
-        for hit in data[centrotype_idx]:
-            output_single_handle.writerow(hit)
-
-def alternative_headers(header_part2, nr_alternative_hits):
-    '''
-    This method will iterate over the header names and add the string 'ALT#_' before each,
-    where # is the number of the alternative, according to number of alternative hits we want to add
-    to final csv/tsv
-    '''
-    result = []
-    for i in xrange(nr_alternative_hits):
-        for header_name in header_part2:
-            result.append("ALT" + str(i+1) + "_" + header_name)
-    return result
-
-def main():
-    '''
-    Combine Output main function
-    It will merge the result files from "RankFilter"  and "Lookup RI for CAS numbers"
-    NB: the caslookup_result_file will typically have fewer lines than
-    rankfilter_result_file, so the merge has to consider this as well. The final file
-    should have the same nr of lines as rankfilter_result_file.
-    '''
-    rankfilter_result_file = sys.argv[1]
-    caslookup_result_file = sys.argv[2]
-    output_single_csv = sys.argv[3]
-    output_multi_csv = sys.argv[4]
-
-    # Read RankFilter and CasLookup output files
-    rankfilter = _process_data(rankfilter_result_file)
-    caslookup = _process_data(caslookup_result_file)
-    merged, nhits = _merge_data(rankfilter, caslookup)
-    _save_data(merged, nhits, output_single_csv, output_multi_csv)
-
-
-if __name__ == '__main__':
-    main()
--- a/combine_output.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-<tool id="combine_output" name="RIQC-Combine RankFilter and CasLookup output" version="1.0.2">
-  <description>Perform a combination of output data from the RankFilter and CasLookup tools</description>
-  <command interpreter="python">
-    combine_output.py $rankfilter_in $caslookup_in $out_single $out_multi
-  </command>
-  <inputs>
-    <param format="tabular" name="rankfilter_in" type="data" label="RIQC-RankFilter output (Estimated RI)"
-    	help="Select the output file from the RankFilter tool"/>
-    <param format="tabular" name="caslookup_in" type="data" label="RIQC-Lookup RI for CAS output ('Known' RI)"
-    	help="Select the output file from the CasLookup tool"/>
-    <!--   <param TODO : could add "tolerance for ERI-KRI"(Estimated RI-Known RI)-->
-  </inputs>
-  <outputs>
-    <data format="tabular" label="${tool.name} (Single) on ${on_string}" name="out_single" />
-    <data format="tabular" label="${tool.name} (Multi) on ${on_string}" name="out_multi" />
-  </outputs>
-  <help>
-Performs a combination of output files from the 'RankFilter' and 'Lookup RI for CAS' tools into two tab-separated files.
-
-The files produced are contain either all hits for a compound on a single line (Single) or on separate lines
-(Multi).
-
-.. class:: infomark
-
-**Notes**
-
-The input data should be produced by the RankFilter and 'Lookup RI for CAS' tools provided on this Galaxy server with the
-original headers kept intact. Processing steps include:
-
-   - Added columns showing the average Forward/Reverse values, RIexp - RIsvr and RI - RIexp values
-   - The ID column of the RankFilter tool output is split into 'Centrotype', 'cent.Factor', 'scan nr.', 'R.T. (umin)'
-     and 'nr. Peaks' fields.
-   - The formula is split off the 'Name' field in the RankFilter output
-
-  </help>
-</tool>
--- a/create_model.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-<tool id="create_poly_model" name="RIQC-Create Regression Model" version="1.0.2">
-  <description>Generate coefficients to enable the regression from one GC-column
-		  		         to another GC-column</description>
-  <command interpreter="Rscript">Rscripts/ridb-regression.R
-               $ridb
-               $out_model
-               $out_log
-               $min_residuals
-               $range_mod
-               $pvalue
-               $rsquared
-               $method
-               $plot
-               #if $plot
-                   $model_graphics
-               #end if
-  </command>
-  <inputs>
-    <param format="tabular" name="ridb" type="select" label="Retention Index (RI) and GC columns Library file"
-           help="Select the RI library file of which all GC columns and their RI values
-                 will be used to create a model"
-      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RI_DB_libraries")'/>
-
-    <param name="method" type="select" label="Select regression method"
-           help="Method to use for calculating the model" >
-           <option value="poly" selected="True">Polynomial (3rd degree)</option>
-           <option value="linear">Linear</option>
-    </param>
-    <param name="min_residuals" type="integer" value="10" optional="False"
-           label="Minimum number of residuals" help="The minimum number of residuals
-                 (datapoints) that both columns should have in common when calculating
-                 the model" />
-    <param name="range_mod" type="integer" value="0" optional="False"
-           label="Range modifier" help="Moves the range of the usable RI space by the
-                  given percentage. Set to 0 to use the full range of available data." />
-    <param name="pvalue" type="float" value="0.05" optional="False" min="0" max="1"
-           label="Pvalue to filter on" help="Set the upper limit for the pvalue (calculated)
-                  by performing an ANOVA analysis on the created model). All models with higher
-                  pvalues are discarded." />
-    <param name="rsquared" type="float" value="0.95" optional="False" min="0" max="1"
-           label="R-squared to filter on" help="Set the lower limit for the R-squared,
-                  all models with lower values are discarded." />
-    <param name="plot" type="boolean" label="Create a separate plot for each model"
-           help="This will create a ZIP file in the history containing PDF plots" />
-  </inputs>
-  <code file="match_library.py" />
-  <outputs>
-  	<data format="zip" label="Model Graphics of ${on_string}" name="model_graphics" >
-  	    <filter>(plot)</filter>
-  	</data>
-    <data format="tabular" label="Regression logfile of ${on_string}"  name="out_log" />
-    <data format="tabular" label="Regression model of ${on_string}"  name="out_model" />
-  </outputs>
-  <help>
-Calculates regression models for a permutation of all GC columns contained in the selected
-RI database file. The method used for creating the model is either based on a 3rd degree
-polynomial or a standard linear model.
-
-The *Minimum number of residuals* option will only allow regression if the columns it is based
-on has at least that number of datapoints on the same compound.
-
-Filtering is possible by setting an upper limit for the *p-value* and / or a lower limit for
-the *R squared* value. The produced logfile will state how many models have been discarded due
-to this filtering. The output model file also includes the p-value and R squared value for
-each created model.
-
-Graphical output of the models is available by selecting the plot option which shows the
-data points used for the model as well as the fit itself and the range of data that will
-be usable.
-
-.. class:: infomark
-
-**Notes**
-
-The output file produced by this tool is required as input for the CasLookup tool when
-selecting to apply regression when finding hits in the RIDB.
-  </help>
-</tool>
--- a/export_to_metexp_tabular.py	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,247 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-'''
-Module to combine output from the GCMS Galaxy tools RankFilter, CasLookup and MsClust
-into a tabular file that can be uploaded to the MetExp database.
-
-RankFilter, CasLookup are already combined by combine_output.py so here we will use
-this result. Furthermore here one of the MsClust
-quantification files containing the respective spectra details are to be combined as well.
-
-Extra calculations performed:
-- The column MW is also added here and is derived from the column FORMULA found
-  in RankFilter, CasLookup combined result.
-
-So in total here we merge 2 files and calculate one new column.
-'''
-from pkg_resources import resource_filename  # @UnresolvedImport # pylint: disable=E0611
-import csv
-import re
-import sys
-from collections import OrderedDict
-
-__author__ = "Pieter Lukasse"
-__contact__ = "pieter.lukasse@wur.nl"
-__copyright__ = "Copyright, 2013, Plant Research International, WUR"
-__license__ = "Apache v2"
-
-def _process_data(in_csv, delim='\t'):
-    '''
-    Generic method to parse a tab-separated file returning a dictionary with named columns
-    @param in_csv: input filename to be parsed
-    '''
-    data = list(csv.reader(open(in_csv, 'rU'), delimiter=delim))
-    header = data.pop(0)
-    # Create dictionary with column name as key
-    output = OrderedDict()
-    for index in xrange(len(header)):
-        output[header[index]] = [row[index] for row in data]
-    return output
-
-ONE_TO_ONE = 'one_to_one'
-N_TO_ONE = 'n_to_one'
-
-def _merge_data(set1, link_field_set1, set2, link_field_set2, compare_function, merge_function, metadata, relation_type=ONE_TO_ONE):
-    '''
-    Merges data from both input dictionaries based on the link fields. This method will
-    build up a new list containing the merged hits as the items.
-    @param set1: dictionary holding set1 in the form of N lists (one list per attribute name)
-    @param set2: dictionary holding set2 in the form of N lists (one list per attribute name)
-    '''
-    # TODO test for correct input files -> same link_field values should be there
-    # (test at least number of unique link_field values):
-    #
-    # if (len(set1[link_field_set1]) != len(set2[link_field_set2])):
-    #    raise Exception('input files should have the same nr of key values  ')
-
-
-    merged = []
-    processed = {}
-    for link_field_set1_idx in xrange(len(set1[link_field_set1])):
-        link_field_set1_value = set1[link_field_set1][link_field_set1_idx]
-        if not link_field_set1_value in processed :
-            # keep track of processed items to not repeat them
-            processed[link_field_set1_value] = link_field_set1_value
-
-            # Get the indices for current link_field_set1_value in both data-structures for proper matching
-            set1index = [index for index, value in enumerate(set1[link_field_set1]) if value == link_field_set1_value]
-            set2index = [index for index, value in enumerate(set2[link_field_set2]) if compare_function(value, link_field_set1_value)==True ]
-            # Validation :
-            if len(set2index) == 0:
-                # means that corresponding data could not be found in set2, then throw error
-                raise Exception("Datasets not compatible, merge not possible. " + link_field_set1 + "=" +
-                                link_field_set1_value + " only found in first dataset. ")
-
-            merged_hits = []
-            # Combine hits
-            for hit in xrange(len(set1index)):
-                # Create records of hits to be merged ("keys" are the attribute names, so what the lines below do
-                # is create a new "dict" item with same "keys"/attributes, with each attribute filled with its
-                # corresponding value in the sets; i.e.
-                # set1[key] => returns the list/array with size = nrrows, with the values for the attribute
-                #                    represented by "key".
-                # set1index[hit] => points to the row nr=hit (hit is a rownr/index)
-                # So set1[x][set1index[n]] = set1.attributeX.instanceN
-                #
-                # It just ensures the entry is made available as a plain named array for easy access.
-                rf_record = OrderedDict(zip(set1.keys(), [set1[key][set1index[hit]] for key in set1.keys()]))
-                if relation_type == ONE_TO_ONE :
-                    cl_record = OrderedDict(zip(set2.keys(), [set2[key][set2index[hit]] for key in set2.keys()]))
-                else:
-                    # is N to 1:
-                    cl_record = OrderedDict(zip(set2.keys(), [set2[key][set2index[0]] for key in set2.keys()]))
-
-                merged_hit = merge_function(rf_record, cl_record, metadata)
-                merged_hits.append(merged_hit)
-
-            merged.append(merged_hits)
-
-    return merged
-
-
-def _compare_records(key1, key2):
-    '''
-    in this case the compare method is really simple as both keys are expected to contain
-    same value when records are the same
-    '''
-    if key1 == key2:
-        return True
-    else:
-        return False
-
-
-
-def _merge_records(rank_caslookup_combi, msclust_quant_record, metadata):
-    '''
-    Combines single records from both the RankFilter+CasLookup combi file and from MsClust file
-
-    @param rank_caslookup_combi: rankfilter and caslookup combined record (see combine_output.py)
-    @param msclust_quant_record: msclust quantification + spectrum record
-    '''
-    record = []
-    for column in rank_caslookup_combi:
-        record.append(rank_caslookup_combi[column])
-
-    for column in msclust_quant_record:
-        record.append(msclust_quant_record[column])
-
-    for column in metadata:
-        record.append(metadata[column])
-
-    # add MOLECULAR MASS (MM)
-    molecular_mass = get_molecular_mass(rank_caslookup_combi['FORMULA'])
-    # limit to two decimals:
-    record.append("{0:.2f}".format(molecular_mass))
-
-    # add MOLECULAR WEIGHT (MW) - TODO - calculate this
-    record.append('0.0')
-
-    # level of identification and Location of reference standard
-    record.append('0')
-    record.append('')
-
-    return record
-
-
-def get_molecular_mass(formula):
-    '''
-    Calculates the molecular mass (MM).
-    E.g. MM of H2O = (relative)atomic mass of H x2 + (relative)atomic mass of O
-    '''
-
-    # Each element is represented by a capital letter, followed optionally by
-    # lower case, with one or more digits as for how many elements:
-    element_pattern = re.compile("([A-Z][a-z]?)(\d*)")
-
-    total_mass = 0
-    for (element_name, count) in element_pattern.findall(formula):
-        if count == "":
-            count = 1
-        else:
-            count = int(count)
-        element_mass = float(elements_and_masses_map[element_name])  # "found: Python's built-in float type has double precision " (? check if really correct ?)
-        total_mass += element_mass * count
-
-    return total_mass
-
-
-
-def _save_data(data, headers, out_csv):
-    '''
-    Writes tab-separated data to file
-    @param data: dictionary containing merged dataset
-    @param out_csv: output csv file
-    '''
-
-    # Open output file for writing
-    outfile_single_handle = open(out_csv, 'wb')
-    output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
-
-    # Write headers
-    output_single_handle.writerow(headers)
-
-    # Write
-    for item_idx in xrange(len(data)):
-        for hit in data[item_idx]:
-            output_single_handle.writerow(hit)
-
-
-def _get_map_for_elements_and_masses(elements_and_masses):
-    '''
-    This method will read out the column 'Chemical symbol' and make a map
-    of this, storing the column 'Relative atomic mass' as its value
-    '''
-    resultMap = {}
-    index = 0
-    for entry in elements_and_masses['Chemical symbol']:
-        resultMap[entry] = elements_and_masses['Relative atomic mass'][index]
-        index += 1
-
-    return resultMap
-
-
-def init_elements_and_masses_map():
-    '''
-    Initializes the lookup map containing the elements and their respective masses
-    '''
-    elements_and_masses = _process_data(resource_filename(__name__, "static_resources/elements_and_masses.tab"))
-    global elements_and_masses_map
-    elements_and_masses_map = _get_map_for_elements_and_masses(elements_and_masses)
-
-
-def main():
-    '''
-    Combine Output main function
-
-    RankFilter, CasLookup are already combined by combine_output.py so here we will use
-    this result. Furthermore here the MsClust spectra file (.MSP) and one of the MsClust
-    quantification files are to be combined with combine_output.py result as well.
-    '''
-    rankfilter_and_caslookup_combined_file = sys.argv[1]
-    msclust_quantification_and_spectra_file = sys.argv[2]
-    output_csv = sys.argv[3]
-    # metadata
-    metadata = OrderedDict()
-    metadata['organism'] = sys.argv[4]
-    metadata['tissue'] = sys.argv[5]
-    metadata['experiment_name'] = sys.argv[6]
-    metadata['user_name'] = sys.argv[7]
-    metadata['column_type'] = sys.argv[8]
-
-    # Read RankFilter and CasLookup output files
-    rankfilter_and_caslookup_combined = _process_data(rankfilter_and_caslookup_combined_file)
-    msclust_quantification_and_spectra = _process_data(msclust_quantification_and_spectra_file, ',')
-
-    # Read elements and masses to use for the MW/MM calculation :
-    init_elements_and_masses_map()
-
-    merged = _merge_data(rankfilter_and_caslookup_combined, 'Centrotype',
-                                msclust_quantification_and_spectra, 'centrotype',
-                                _compare_records, _merge_records, metadata,
-                                N_TO_ONE)
-    headers = rankfilter_and_caslookup_combined.keys() + msclust_quantification_and_spectra.keys() + metadata.keys() + ['MM','MW', 'Level of identification', 'Location of reference standard']
-    _save_data(merged, headers, output_csv)
-
-
-if __name__ == '__main__':
-    main()
--- a/export_to_metexp_tabular.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="export_to_metexp_tabular"
-    name="METEXP - Tabular file"
-    version="0.2.0">
-  <description>Create tabular file for loading into METabolomics EXPlorer database</description>
-  <command interpreter="python">
-    export_to_metexp_tabular.py
-    $rankfilter_and_caslookup_combi
-    $msclust_quant_file
-    $output_result
-    "$organism"
-    "$tissue"
-    "$experiment_name"
-    "$user_name"
-    "$column_type"
-  </command>
-  <inputs>
-    <param format="tabular" name="rankfilter_and_caslookup_combi" type="data" label="RIQC-Combine RankFilter and CasLookup output"
-    	help="Select the (multi) output file from the 'Combine RankFilter and CasLookup' tool"/>
-    <param format="tabular" name="msclust_quant_file" type="data" label="MusClust-quantification file output"
-    	help="Select the output file from MsClust (centrotype, mic or sim) which also contain respective spectrum details"/>
-
-
-   <param name="organism" type="text" size="80"
-           label="Organism(s) info"
-           help="Metadata information to accompany the results when stored in MetExp DB." >
-           <validator type="empty_field" message="A value is required."></validator><!-- attribute optional="False" does not seem to work for params so validator is added -->
-    </param>
-
-   <param name="tissue" type="text" size="80"
-           label="Tissue(s) info"
-           help="Metadata information to accompany the results when stored in MetExp DB."  >
-           <validator type="empty_field" message="A value is required."></validator>
-    </param>
-
-   <param name="experiment_name" type="text" size="80"
-           label="Experiment name/code"
-           help="Name or code to store the results under. This can help you find the results back in MetExpDB."  >
-           <validator type="empty_field" message="A value is required."></validator>
-    </param>
-
-   <param name="user_name" type="text" size="80"
-           label="User name"
-           help="User name or code to store the results under. This can help you find the results back in MetExpDB."  >
-           <validator type="empty_field" message="A value is required."></validator>
-    </param>
-
-    <param name="column_type" type="text" size="80"
-           label="Column type"
-           help="Column type to report with the results. This can help you find the results back in MetExpDB."  >
-           <validator type="empty_field" message="A value is required."></validator>
-    </param>
-
-  </inputs>
-  <outputs>
-    <data format="tabular" label="${tool.name} on ${on_string}" name="output_result" />
-  </outputs>
-  <help>
-.. class:: infomark
-
-Tool to combine output from the tools RankFilter, CasLookup and MsClust
-into a tabular file that can be uploaded to the METabolomics EXPlorer (MetExp) database.
-
-RankFilter, CasLookup are already combined by 'RIQC-Combine RankFilter and CasLookup' tool so here we will use
-this result.
-
-**Notes**
-
-Extra calculations performed:
-- The columns MM and MW are also added here and are derived from the column FORMULA found in RankFilter, CasLookup combined result.
-
-So in total here we merge 2 files and calculate one new column.
-
-
-  </help>
-</tool>
--- a/library_lookup.py	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,327 +0,0 @@
-'''
-Logic for searching a Retention Index database file given output from NIST
-'''
-import match_library
-import re
-import sys
-import csv
-
-__author__ = "Marcel Kempenaar"
-__contact__ = "brs@nbic.nl"
-__copyright__ = "Copyright, 2012, Netherlands Bioinformatics Centre"
-__license__ = "MIT"
-
-def create_lookup_table(library_file, column_type_name, statphase):
-    '''
-    Creates a dictionary holding the contents of the library to be searched
-    @param library_file: library to read
-    @param column_type_name: the columns type name
-    @param statphase: the columns stationary phase
-    '''
-    (data, header) = match_library.read_library(library_file)
-    # Test for presence of required columns
-    if ('columntype' not in header or
-        'columnphasetype' not in header or
-        'cas' not in header):
-        raise IOError('Missing columns in ', library_file)
-
-    column_type_column = header.index("columntype")
-    statphase_column = header.index("columnphasetype")
-    cas_column = header.index("cas")
-
-    filtered_library = [line for line in data if line[column_type_column] == column_type_name
-                        and line[statphase_column] == statphase]
-    lookup_dict = {}
-    for element in filtered_library:
-        # Here the cas_number is set to the numeric part of the cas_column value, so if the
-        # cas_column value is 'C1433' then cas_number will be '1433'
-        cas_number = str(re.findall(r'\d+', (element[cas_column]).strip())[0])
-        try:
-            lookup_dict[cas_number].append(element)
-        except KeyError:
-            lookup_dict[cas_number] = [element]
-    return lookup_dict
-
-
-def _preferred(hits, pref, ctype, polar, model, method):
-    '''
-    Returns all entries in the lookup_dict that have the same column name, type and polarity
-    as given by the user, uses regression if selected given the model and method to use. The
-    regression is applied on the column with the best R-squared value in the model
-    @param hits: all entries in the lookup_dict for the given CAS number
-    @param pref: preferred GC-column, can be one or more names
-    @param ctype: column type (capillary etc.)
-    @param polar: polarity (polar / non-polar etc.)
-    @param model: data loaded from file containing regression models
-    @param method: supported regression method (i.e. poly(nomial) or linear)
-    '''
-    match = []
-    for column in pref:
-        for hit in hits:
-            if hit[4] == ctype and hit[5] == polar and hit[6] == column:
-                # Create copy of found hit since it will be altered downstream
-                match.extend(hit)
-                return match, False
-
-    # No hit found for current CAS number, return if not performing regression
-    if not model:
-        return False, False
-
-    # Perform regression
-    for column in pref:
-        if column not in model:
-            break
-        # Order regression candidates by R-squared value (last element)
-        order = sorted(model[column].items(), key=lambda col: col[1][-1])
-        # Create list of regression candidate column names
-        regress_columns = list(reversed([column for (column, _) in order]))
-        # Names of available columns
-        available = [hit[6] for hit in hits]
-
-        # TODO: combine Rsquared and number of datapoints to get the best regression match
-        '''
-        # Iterate regression columns (in order) and retrieve their models
-        models = {}
-        for col in regress_columns:
-            if col in available:
-                hit = list(hits[available.index(col)])
-                if hit[4] == ctype:
-                    # models contains all model data including residuals [-2] and rsquared [-1]
-                    models[pref[0]] = model[pref[0]][hit[6]]
-        # Get the combined maximum for residuals and rsquared
-        best_match = models[]
-        # Apply regression
-        if method == 'poly':
-            regressed = _apply_poly_regression(best_match, hit[6], float(hit[3]), model)
-            if regressed:
-                hit[3] = regressed
-            else:
-                return False, False
-            else:
-                hit[3] = _apply_linear_regression(best_match, hit[6], float(hit[3]), model)
-                match.extend(hit)
-            return match, hit[6]
-        '''
-
-        for col in regress_columns:
-            if col in available:
-                hit = list(hits[available.index(col)])
-                if hit[4] == ctype:
-                    # Perform regression using a column for which regression is possible
-                    if method == 'poly':
-                        # Polynomial is only possible within a set border, if the RI falls outside
-                        # of this border, skip this lookup
-                        regressed = _apply_poly_regression(pref[0], hit[6], float(hit[3]), model)
-                        if regressed:
-                            hit[3] = regressed
-                        else:
-                            return False, False
-                    else:
-                        hit[3] = _apply_linear_regression(pref[0], hit[6], float(hit[3]), model)
-                    match.extend(hit)
-                    return match, hit[6]
-
-    return False, False
-
-
-
-def default_hit(row, cas_nr, compound_id):
-    '''
-    This method will return a "default"/empty hit for cases where the
-    method _preferred() returns False (i.e. a RI could not be found
-    for the given cas nr, also not via regression.
-    '''
-    return [
-            #'CAS',
-            'C' + cas_nr,
-            #'NAME',
-            '',
-            #'FORMULA',
-            '',
-            #'RI',
-            '0.0',
-            #'Column.type',
-            '',
-            #'Column.phase.type',
-            '',
-            #'Column.name',
-            '',
-            #'phase.coding',
-            ' ',
-            #'CAS_column.Name',
-            '',
-            #'Centrotype', -> NOTE THAT compound_id is not ALWAYS centrotype...depends on MsClust algorithm used...for now only one MsClust algorithm is used so it is not an issue, but this should be updated/corrected once that changes
-            compound_id,
-            #'Regression.Column.Name',
-            '',
-            #'min',
-            '',
-            #'max',
-            '',
-            #'nr.duplicates',
-            '']
-
-
-def format_result(lookup_dict, nist_tabular_filename, pref, ctype, polar, model, method):
-    '''
-    Looks up the compounds in the library lookup table and formats the results
-    @param lookup_dict: dictionary containing the library to be searched
-    @param nist_tabular_filename: NIST output file to be matched
-    @param pref: (list of) column-name(s) to look for
-    @param ctype: column type of interest
-    @param polar: polarity of the used column
-    @param model: data loaded from file containing regression models
-    @param method: supported regression method (i.e. poly(nomial) or linear)
-    '''
-    (nist_tabular_list, header_clean) = match_library.read_library(nist_tabular_filename)
-    # Retrieve indices of the CAS and compound_id columns (exit if not present)
-    try:
-        casi = header_clean.index("cas")
-        idi = header_clean.index("id")
-    except:
-        raise IOError("'CAS' or 'compound_id' not found in header of library file")
-
-    data = []
-    for row in nist_tabular_list:
-        casf = str(row[casi].replace('-', '').strip())
-        compound_id = str(row[idi].split('-')[0])
-        if casf in lookup_dict:
-            found_hit, regress = _preferred(lookup_dict[casf], pref, ctype, polar, model, method)
-            if found_hit:
-                # Keep cas nr as 'C'+ numeric part:
-                found_hit[0] = 'C' + casf
-                # Add compound id
-                found_hit.insert(9, compound_id)
-                # Add information on regression process
-                found_hit.insert(10, regress if regress else 'None')
-                # Replace column index references with actual number of duplicates
-                dups = len(found_hit[-1].split(','))
-                if dups > 1:
-                    found_hit[-1] = str(dups + 1)
-                else:
-                    found_hit[-1] = '0'
-                data.append(found_hit)
-                found_hit = ''
-            else:
-                data.append(default_hit(row, casf, compound_id))
-        else:
-            data.append(default_hit(row, casf, compound_id))
-
-        casf = ''
-        compound_id = ''
-        found_hit = []
-        dups = []
-    return data
-
-
-def _save_data(content, outfile):
-    '''
-    Write to output file
-    @param content: content to write
-    @param outfile: file to write to
-    '''
-    # header
-    header = ['CAS',
-              'NAME',
-              'FORMULA',
-              'RI',
-              'Column.type',
-              'Column.phase.type',
-              'Column.name',
-              'phase.coding',
-              'CAS_column.Name',
-              'Centrotype',
-              'Regression.Column.Name',
-              'min',
-              'max',
-              'nr.duplicates']
-    output_handle = csv.writer(open(outfile, 'wb'), delimiter="\t")
-    output_handle.writerow(header)
-    for entry in content:
-        output_handle.writerow(entry)
-
-
-def _read_model(model_file):
-    '''
-    Creates an easy to search dictionary for getting the regression parameters
-    for each valid combination of GC-columns
-    @param model_file: filename containing the regression models
-    '''
-    regress = list(csv.reader(open(model_file, 'rU'), delimiter='\t'))
-    if len(regress.pop(0)) > 9:
-        method = 'poly'
-    else:
-        method = 'linear'
-
-    model = {}
-    # Create new dictionary for each GC-column
-    for line in regress:
-        model[line[0]] = {}
-
-    # Add data
-    for line in regress:
-        if method == 'poly':
-            model[line[0]][line[1]] = [float(col) for col in line[2:11]]
-        else:  # linear
-            model[line[0]][line[1]] = [float(col) for col in line[2:9]]
-
-    return model, method
-
-
-def _apply_poly_regression(column1, column2, retention_index, model):
-    '''
-    Calculates a new retention index (RI) value using a given 3rd-degree polynomial
-    model based on data from GC columns 1 and 2
-    @param column1: name of the selected GC-column
-    @param column2: name of the GC-column to use for regression
-    @param retention_index: RI to convert
-    @param model: dictionary containing model information for all GC-columns
-    '''
-    coeff = model[column1][column2]
-    # If the retention index to convert is within range of the data the model is based on, perform regression
-    if coeff[4] < retention_index < coeff[5]:
-        return (coeff[3] * (retention_index ** 3) + coeff[2] * (retention_index ** 2) +
-                (retention_index * coeff[1]) + coeff[0])
-    else:
-        return False
-
-
-def _apply_linear_regression(column1, column2, retention_index, model):
-    '''
-    Calculates a new retention index (RI) value using a given linear model based on data
-    from GC columns 1 and 2
-    @param column1: name of the selected GC-column
-    @param column2: name of the GC-column to use for regression
-    @param retention_index: RI to convert
-    @param model: dictionary containing model information for all GC-columns
-    '''
-    # TODO: No use of limits
-    coeff = model[column1][column2]
-    return coeff[1] * retention_index + coeff[0]
-
-
-def main():
-    '''
-    Library Lookup main function
-    '''
-    library_file = sys.argv[1]
-    nist_tabular_filename = sys.argv[2]
-    ctype = sys.argv[3]
-    polar = sys.argv[4]
-    outfile = sys.argv[5]
-    pref = sys.argv[6:-1]
-    regress = sys.argv[-1]
-
-    if regress != 'False':
-        model, method = _read_model(regress)
-    else:
-        model, method = False, None
-
-    lookup_dict = create_lookup_table(library_file, ctype, polar)
-    data = format_result(lookup_dict, nist_tabular_filename, pref, ctype, polar, model, method)
-
-    _save_data(data, outfile)
-
-
-if __name__ == "__main__":
-    main()
--- a/library_lookup.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="lookup_library" name="RIQC-Lookup RI for CAS numbers in library" version="1.0.2">
-  <description>Lookup or estimate the RI using a "known RI values" CAS numbers library</description>
-  <command interpreter="python">
-    library_lookup.py
-    $library_file
-    $input
-    "$col_type"
-    "$polarity"
-    $output
-    #for $ctype in $pref
-      ${ctype.columntype}
-    #end for
-    $regression.model
-  </command>
-  <inputs>
-  <!-- Regarding the <page> items: this blocks the use of this tool in Galaxy workflows. However,
-       alternatives like wrapping this in conditionals, repeats (to force a refresh_on_change as this option
-       is not working on its own) failed since the workflow editor does not support refreshes...not does the
-       workflow runtime support conditionals or repeats to be set at runtime. See also
-       galaxy-dev mail thread "when else" in <conditional> ? RE: refresh_on_change : is this a valid attribute? Any other ideas/options??"  -->
-    <page>
-      <param format="tabular" name="input" type="data" label="NIST identifications as tabular file"
-      		 help="Select a tab delimited NIST metabolite identifications file (converted from PDF)" />
-      <param name="library_file" type="select" label="CAS x RI Library file"
-      		 help="Select a library/lookup file containing RI values for CAS numbers on various chromatography columns "
-      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RI_DB_libraries")'/>
-    </page>
-    <page>
-      <param name="col_type" type="select" label="Select column type" refresh_on_change="true"
-	         display="radio" dynamic_options='get_column_type(library_file)'
-	         help="" />
-	</page>
-    <page>
-      <param name="polarity" type="select" label="Select polarity" refresh_on_change="true"
-             display="radio" dynamic_options='filter_column(library_file,col_type)'
-	         help="" />
-    </page>
-    <page>
-	  <conditional name="regression">
-		  <param name="regression_select" type="boolean" checked="false" label="Apply regression method"
-		  		 help="If no data for the selected column is present in the database, selecting this option will try
-		  		 	   to convert Retention Indices using data from other GC-columns with a regression method. Please
-		  		 	   note that only the first given GC-column above will be used for this, any alternatives will be
-		  		 	   ignored" />
-		  <when value="true">
-		  	<param name="model" format="tabular" type="data" label="Tabular file containing regression model"
-		  		   help="This file contains the coefficients used to perform the regression from one GC-column
-		  		         to another GC-column"/>
-		  </when>
-          <when value="false">
-            <param name="model" type="hidden" value="False" />
-          </when>
-	  </conditional>
-      <repeat name="pref" title="Select column name preference">
-		<param name="columntype" type="select" label="Column name" refresh_on_change="true"
-               dynamic_options='filter_column2(library_file, col_type, polarity)'
-	       	   help="Select one or more column names for filtering. The order defines the priority." />
-      </repeat>
-    </page>
-  </inputs>
-  <outputs>
-    <data format="tabular" label="${tool.name} on" name="output" />
-</outputs>
-<code file="match_library.py" />
-  <help>
-Performs a lookup of the RI values by matching CAS numbers from the given NIST identifications file to a library.
-If a direct match is NOT found for the preferred column name, a regression can be done to find
-the theoretical RI value based on known RI values for the CAS number on other column types (see step 4).
-If there is no match for the CAS number on any column type, then the record is not given a RI.
-
-
-
-  </help>
-
-</tool>
--- a/match_library.py	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,133 +0,0 @@
-'''
-Containing functions are called from Galaxy to populate lists/checkboxes with selectable items
-'''
-import csv
-import glob
-import os
-
-
-__author__ = "Marcel Kempenaar"
-__contact__ = "brs@nbic.nl"
-__copyright__ = "Copyright, 2012, Netherlands Bioinformatics Centre"
-__license__ = "MIT"
-
-def get_column_type(library_file):
-    '''
-    Returns a Galaxy formatted list of tuples containing all possibilities for the
-    GC-column types. Used by the library_lookup.xml tool
-    @param library_file: given library file from which the list of GC-column types is extracted
-    '''
-    if library_file == "":
-        galaxy_output = [("", "", False)]
-    else:
-        (data, header) = read_library(library_file)
-
-        if 'columntype' not in header:
-            raise IOError('Missing columns in ', library_file)
-
-        # Filter data on column type
-        column_type = header.index("columntype")
-        amounts_in_list_dict = count_occurrence([row[column_type] for row in data])
-        galaxy_output = [(str(a) + "(" + str(b) + ")", a, False) for a, b in amounts_in_list_dict.items()]
-
-    return(galaxy_output)
-
-
-def filter_column(library_file, column_type_name):
-    '''
-    Filters the Retention Index database on column type
-    @param library_file: file containing the database
-    @param column_type_name: column type to filter on
-    '''
-    if library_file == "":
-        galaxy_output = [("", "", False)]
-    else:
-        (data, header) = read_library(library_file)
-
-        if ('columntype' not in header or
-            'columnphasetype' not in header):
-            raise IOError('Missing columns in ', library_file)
-
-        column_type = header.index("columntype")
-        statphase = header.index("columnphasetype")
-
-        # Filter data on colunn type name
-        statphase_list = [line[statphase] for line in data if line[column_type] == column_type_name]
-        amounts_in_list_dict = count_occurrence(statphase_list)
-        galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()]
-
-    return(sorted(galaxy_output))
-
-
-def filter_column2(library_file, column_type_name, statphase):
-    '''
-    Filters the Retention Index database on column type
-    @param library_file: file containing the database
-    @param column_type_name: column type to filter on
-    @param statphase: stationary phase of the column to filter on
-    '''
-    if library_file == "":
-        galaxy_output = [("", "", False)]
-    else:
-        (data, header) = read_library(library_file)
-
-        if ('columntype' not in header or
-            'columnphasetype' not in header or
-            'columnname' not in header):
-            raise IOError('Missing columns in ', library_file)
-
-        column_type_column = header.index("columntype")
-        statphase_column = header.index("columnphasetype")
-        column_name_column = header.index("columnname")
-
-        # Filter data on given column type name and stationary phase
-        statphase_list = [line[column_name_column] for line in data if line[column_type_column] == column_type_name and
-                          line[statphase_column] == statphase]
-        amounts_in_list_dict = count_occurrence(statphase_list)
-        galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()]
-
-    return(sorted(galaxy_output))
-
-
-def read_library(filename):
-    '''
-    Reads a CSV file and returns its contents and a normalized header
-    @param filename: file to read
-    '''
-    data = list(csv.reader(open(filename, 'rU'), delimiter='\t'))
-    header_clean = [i.lower().strip().replace(".", "").replace("%", "") for i in data.pop(0)]
-    return(data, header_clean)
-
-
-
-def get_directory_files(dir_name):
-    '''
-    Reads the directory and
-    returns the list of .txt files found as a dictionary
-    with file name and full path so that it can
-    fill a Galaxy drop-down combo box.
-
-    '''
-    files = glob.glob(dir_name + "/*.*")
-    if len(files) == 0:
-        # Configuration error: no library files found in <galaxy-home-dir>/" + dir_name :
-        galaxy_output = [("Configuration error: expected file not found in <galaxy-home-dir>/" + dir_name, "", False)]
-    else:
-        galaxy_output = [(str(get_file_name_no_ext(file_name)), str(os.path.abspath(file_name)), False) for file_name in files]
-    return(galaxy_output)
-
-def get_file_name_no_ext(full_name):
-    '''
-    returns just the last part of the name
-    '''
-    simple_name = os.path.basename(full_name)
-    base, ext = os.path.splitext(simple_name)
-    return base
-
-
-def count_occurrence(data_list):
-    '''
-    Counts occurrences in a list and returns a dict with item:occurrence
-    @param data_list: list to count items from
-    '''
-    return dict((key, data_list.count(key)) for key in set(data_list))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/INFO_structure_of_LC.txt	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,532 @@
+str(LC)
+List of 4
+ $ PeakTable :'data.frame':     1828 obs. of  12 variables:
+  ..$ ChemSpiderID   : chr [1:1828] "" "" "" "" ...
+  ..$ compound       : chr [1:1828] "" "" "" "" ...
+  ..$ pcgroup        : num [1:1828] 187 226 226 226 226 226 226 306 154 154 ...
+  ..$ adduct         : chr [1:1828] "" "" "" "" ...
+  ..$ isotopes       : chr [1:1828] "" "" "" "" ...
+  ..$ mz             : num [1:1828] 253 233 214 215 298 ...
+  ..$ rt             : num [1:1828] 1.27 1.57 1.59 1.59 1.59 ...
+  ..$ CLASS2         : num [1:1828] 2 2 2 2 2 2 2 2 2 2 ...
+  ..$ STDmix_GC_01   : num [1:1828] 0 0 0 0 0 0 0 0 0 0 ...
+  ..$ STDmix_GC_02   : num [1:1828] 0 0 0 0 0 0 0 0 0 0 ...
+  ..$ STDmix_RP_pos01: num [1:1828] 144.5 32.6 58.5 38.4 152.3 ...
+  ..$ STDmix_RP_pos02: num [1:1828] 343.4 54.5 79.1 64.9 210.8 ...
+ $ xset      :Formal class 'xsAnnotate' [package "CAMERA"] with 15 slots
+  .. ..@ groupInfo        : num [1:1828, 1:13] 30.2 30.5 31.4 32.4 33.4 ...
+  .. .. ..- attr(*, "dimnames")=List of 2
+  .. .. .. ..$ : NULL
+  .. .. .. ..$ : chr [1:13] "mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "npeaks",  "CLASS1", "CLASS2", "STDmix_GC_01", "STDmix_GC_02", "STDmix_RP_pos01", "STDmix_RP_pos02"
+
+  and groups() function:
+  "mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax",  "npeaks", "CLASS1", "CLASS2"
+
+  .. ..@ pspectra         :List of 340
+  .. .. ..$ : num [1:12] 2 4 5 10 11 13 14 15 16 38 ...
+  .. .. ..$ : num [1:11] 3 12 45 76 99 ...
+  .. .. ..$ : num [1:15] 7 8 9 113 162 ...
+  .. .. ..$ : Named num 1
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:5] 6 150 473 478 802
+  .. .. ..$ : num [1:5] 18 28 40 64 85
+  .. .. ..$ : num [1:5] 89 528 533 596 872
+  .. .. ..$ : num [1:2] 57 197
+  .. .. ..$ : num [1:3] 174 708 841
+  .. .. ..$ : Named num 23
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:4] 21 34 496 1497
+  .. .. ..$ : num [1:3] 29 30 44
+  .. .. ..$ : num [1:5] 283 423 452 486 527
+  .. .. ..$ : num [1:6] 93 126 251 296 349 406
+  .. .. ..$ : num [1:3] 301 479 664
+  .. .. ..$ : num [1:5] 47 105 111 191 237
+  .. .. ..$ : num [1:5] 185 562 683 923 1548
+  .. .. ..$ : num [1:2] 469 809
+  .. .. ..$ : num [1:9] 24 146 548 550 1009 ...
+  .. .. ..$ : num [1:33] 65 72 134 139 196 200 235 299 355 361 ...
+  .. .. ..$ : num [1:2] 135 234
+  .. .. ..$ : num [1:4] 95 326 330 465
+  .. .. ..$ : num [1:2] 384 699
+  .. .. ..$ : Named num 388
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : Named num 249
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:4] 425 485 636 698
+  .. .. ..$ : Named num 482
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:2] 310 429
+  .. .. ..$ : num [1:97] 33 51 67 96 103 106 114 129 136 140 ...
+  .. .. ..$ : Named num 436
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:3] 583 790 1029
+  .. .. ..$ : Named num 593
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:2] 558 745
+  .. .. ..$ : Named num 543
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:44] 43 60 101 121 124 179 211 214 257 263 ...
+  .. .. ..$ : num [1:2] 243 941
+  .. .. ..$ : num [1:4] 657 996 1286 1344
+  .. .. ..$ : num [1:21] 145 195 199 217 230 268 271 279 665 743 ...
+  .. .. ..$ : Named num 836
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : Named num 694
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : Named num 578
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : Named num 612
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : Named num 684
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:18] 31 163 194 395 441 ...
+  .. .. ..$ : num [1:2] 79 282
+  .. .. ..$ : Named num 297
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:10] 167 169 281 284 750 ...
+  .. .. ..$ : num [1:30] 165 796 812 821 829 ...
+  .. .. ..$ : num [1:4] 112 566 733 1552
+  .. .. ..$ : num [1:33] 42 63 68 90 92 102 125 130 133 137 ...
+  .. .. ..$ : num [1:14] 183 188 435 960 1036 ...
+  .. .. ..$ : Named num 763
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : Named num 704
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:2] 693 797
+  .. .. ..$ : Named num 643
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:45] 62 100 115 128 132 178 327 366 430 492 ...
+  .. .. ..$ : Named num 838
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : Named num 653
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:9] 182 186 1035 1040 1045 ...
+  .. .. ..$ : Named num 250
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:2] 144 935
+  .. .. ..$ : num [1:3] 32 1259 1394
+  .. .. ..$ : Named num 77
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:27] 19 46 49 107 110 157 159 233 265 314 ...
+  .. .. ..$ : num [1:4] 253 410 777 991
+  .. .. ..$ : Named num 1031
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : Named num 978
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:22] 887 894 1023 1028 1037 ...
+  .. .. ..$ : num [1:5] 258 312 1669 1670 1694
+  .. .. ..$ : num [1:19] 319 730 735 888 895 ...
+  .. .. ..$ : num [1:22] 55 164 541 617 649 656 661 668 753 766 ...
+  .. .. ..$ : num [1:7] 513 545 770 773 1080 ...
+  .. .. ..$ : num [1:4] 70 295 634 1665
+  .. .. ..$ : num [1:3] 625 1447 1453
+  .. .. ..$ : Named num 287
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:2] 964 970
+  .. .. ..$ : num [1:153] 20 22 36 37 39 53 56 58 59 61 ...
+  .. .. ..$ : num [1:9] 317 323 420 886 1022 ...
+  .. .. ..$ : Named num 149
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:6] 487 549 609 623 728 ...
+  .. .. ..$ : num [1:6] 315 476 1265 1272 1634 ...
+  .. .. ..$ : num [1:5] 378 383 495 734 1063
+  .. .. ..$ : num [1:10] 645 818 823 914 919 ...
+  .. .. ..$ : num [1:7] 815 1388 1457 1539 1555 ...
+  .. .. ..$ : Named num 1019
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:6] 391 398 491 742 982 ...
+  .. .. ..$ : Named num 546
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:12] 794 804 811 820 828 ...
+  .. .. ..$ : num [1:24] 1089 1475 1481 1484 1491 ...
+  .. .. ..$ : num [1:2] 148 151
+  .. .. ..$ : num [1:33] 74 104 108 147 181 189 224 232 260 298 ...
+  .. .. ..$ : num [1:17] 293 332 394 447 449 552 619 622 680 805 ...
+  .. .. ..$ : Named num 309
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : Named num 207
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : Named num 584
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:2] 637 865
+  .. .. ..$ : Named num 143
+  .. .. .. ..- attr(*, "names")= chr ""
+  .. .. ..$ : num [1:18] 1737 1738 1741 1757 1758 ...
+  .. .. ..$ : num [1:29] 261 352 360 363 402 408 416 419 456 507 ...
+  .. .. .. [list output truncated]
+  .. ..@ psSamples        : int [1:340] 1 1 1 2 2 2 2 1 2 1 ...
+  .. ..@ isotopes         :List of 1828
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ :List of 4
+  .. .. .. ..$ y     : num 1
+  .. .. .. ..$ iso   : num 0
+  .. .. .. ..$ charge: Named num 1
+  .. .. .. .. ..- attr(*, "names")= chr "charge"
+  .. .. .. ..$ val   : Named num 0
+  .. .. .. .. ..- attr(*, "names")= chr "intrinsic"
+  .. .. ..$ :List of 4
+  .. .. .. ..$ y     : num 1
+  .. .. .. ..$ iso   : Named num 1
+  .. .. .. .. ..- attr(*, "names")= chr "iso"
+  .. .. .. ..$ charge: Named num 1
+  .. .. .. .. ..- attr(*, "names")= chr "charge"
+  .. .. .. ..$ val   : Named num 0
+  .. .. .. .. ..- attr(*, "names")= chr "intrinsic"
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ :List of 4
+  .. .. .. ..$ y     : num 2
+  .. .. .. ..$ iso   : num 0
+  .. .. .. ..$ charge: Named num 1
+  .. .. .. .. ..- attr(*, "names")= chr "charge"
+  .. .. .. ..$ val   : Named num 0
+  .. .. .. .. ..- attr(*, "names")= chr "intrinsic"
+  .. .. ..$ : NULL
+  .. .. ..$ :List of 4
+  .. .. .. ..$ y     : num 2
+  .. .. .. ..$ iso   : Named num 1
+  .. .. .. .. ..- attr(*, "names")= chr "iso"
+  .. .. .. ..$ charge: Named num 1
+  .. .. .. .. ..- attr(*, "names")= chr "charge"
+  .. .. .. ..$ val   : Named num 0
+  .. .. .. .. ..- attr(*, "names")= chr "intrinsic"
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. ..$ : NULL
+  .. .. .. [list output truncated]
+  .. ..@ derivativeIons   : list()
+  .. ..@ formula          : list()
+  .. ..@ sample           : num NA
+  .. ..@ xcmsSet          :Formal class 'xcmsSet' [package "xcms"] with 12 slots
+  .. .. .. ..@ peaks           : num [1:9826, 1:13] 30.2 30.5 31.4 32.4 33.3 ...
+  .. .. .. .. ..- attr(*, "dimnames")=List of 2
+  .. .. .. .. .. ..$ : NULL
+  .. .. .. .. .. ..$ : chr [1:13] "mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intf", "maxo", "maxf", "i", "sn", "sample"  (found with dput)
+  From http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf:
+
+  mz weighted (by intensity) mean of peak m/z across scans
+mzmin m/z of minimum step
+mzmax m/z of maximum step
+rt retention time of peak midpoint
+rtmin leading edge of peak retention time
+rtmax trailing edge of peak retention time
+into integrated area of original (raw) peak
+intf integrated area of filtered peak
+maxo maximum intensity of original (raw) peak
+maxf maximum intensity of filtered peak
+i rank of peak identified in merged EIC (<= max)
+sn signal to noise ratio of the peak
+
+  .. .. .. ..@ groups          : num [1:1828, 1:9] 30.2 30.5 31.4 32.4 33.4 ...
+  .. .. .. .. ..- attr(*, "dimnames")=List of 2
+  .. .. .. .. .. ..$ : NULL
+  .. .. .. .. .. ..$ : chr [1:9] "mzmed" "mzmin" "mzmax" "rtmed" ...
+  .. .. .. ..@ groupidx        :List of 1828
+  .. .. .. .. ..$ : int [1:4] 1 674 9671 9749
+  .. .. .. .. ..$ : int [1:4] 2 675 9672 9750
+  .. .. .. .. ..$ : int [1:4] 3 676 9673 9751
+  .. .. .. .. ..$ : int [1:4] 4 677 9674 9752
+  .. .. .. .. ..$ : int [1:4] 6 678 9675 9753
+  .. .. .. .. ..$ : int [1:4] 9 683 9676 9754
+  .. .. .. .. ..$ : int [1:4] 11 684 9677 9755
+  .. .. .. .. ..$ : int [1:4] 13 686 9678 9756
+  .. .. .. .. ..$ : int [1:4] 17 688 9679 9757
+  .. .. .. .. ..$ : int [1:4] 18 689 9680 9758
+  .. .. .. .. ..$ : int [1:4] 19 691 9681 9759
+  .. .. .. .. ..$ : int [1:4] 21 693 9682 9760
+  .. .. .. .. ..$ : int [1:4] 23 695 9683 9761
+  .. .. .. .. ..$ : int [1:4] 24 699 9684 9762
+  .. .. .. .. ..$ : int [1:4] 26 700 9685 9763
+  .. .. .. .. ..$ : int [1:4] 28 702 9686 9764
+  .. .. .. .. ..$ : int [1:4] 1353 3930 6183 7926
+  .. .. .. .. ..$ : int [1:4] 36 707 9687 9765
+  .. .. .. .. ..$ : int [1:4] 1354 3931 6184 7927
+  .. .. .. .. ..$ : int [1:4] 1355 3932 6185 7928
+  .. .. .. .. ..$ : int [1:4] 39 712 9688 9766
+  .. .. .. .. ..$ : int [1:4] 1356 3934 6186 7929
+  .. .. .. .. ..$ : int [1:4] 40 714 9689 9767
+  .. .. .. .. ..$ : int [1:4] 1358 3935 6187 7930
+  .. .. .. .. ..$ : int [1:4] 1359 3936 6188 7931
+  .. .. .. .. ..$ : int [1:4] 1361 3938 6189 7932
+  .. .. .. .. ..$ : int [1:4] 1360 3937 6190 7933
+  .. .. .. .. ..$ : int [1:4] 45 722 9690 9768
+  .. .. .. .. ..$ : int [1:4] 54 735 9691 9769
+  .. .. .. .. ..$ : int [1:4] 57 737 9692 9770
+  .. .. .. .. ..$ : int [1:4] 1365 3939 6191 7934
+  .. .. .. .. ..$ : int [1:4] 1364 3940 6192 7935
+  .. .. .. .. ..$ : int [1:4] 1369 3941 6193 7936
+  .. .. .. .. ..$ : int [1:4] 58 739 9693 9771
+  .. .. .. .. ..$ : int [1:4] 1370 3942 6194 7937
+  .. .. .. .. ..$ : int [1:4] 1371 3943 6195 7938
+  .. .. .. .. ..$ : int [1:4] 1374 3944 6196 7939
+  .. .. .. .. ..$ : int [1:4] 65 748 9694 9772
+  .. .. .. .. ..$ : int [1:4] 1375 3946 6197 7940
+  .. .. .. .. ..$ : int [1:4] 71 755 9695 9773
+  .. .. .. .. ..$ : int [1:5] 72 756 1376 1377 3947
+  .. .. .. .. ..$ : int [1:4] 1378 3948 6198 7941
+  .. .. .. .. ..$ : int [1:4] 1379 3950 6199 7942
+  .. .. .. .. ..$ : int [1:4] 74 758 9696 9774
+  .. .. .. .. ..$ : int [1:4] 1381 3951 6200 7943
+  .. .. .. .. ..$ : int [1:4] 1382 3952 6201 7944
+  .. .. .. .. ..$ : int [1:4] 78 761 9697 9775
+  .. .. .. .. ..$ : int [1:4] 1383 3955 6202 7945
+  .. .. .. .. ..$ : int [1:4] 1384 3956 6203 7946
+  .. .. .. .. ..$ : int [1:4] 1385 3957 6204 7947
+  .. .. .. .. ..$ : int [1:4] 1387 3958 6205 7948
+  .. .. .. .. ..$ : int [1:4] 1388 3959 6206 7949
+  .. .. .. .. ..$ : int [1:4] 1389 3960 6207 7950
+  .. .. .. .. ..$ : int [1:4] 1390 3962 6208 7951
+  .. .. .. .. ..$ : int [1:4] 1391 3963 6209 7952
+  .. .. .. .. ..$ : int [1:4] 1392 3965 6210 7953
+  .. .. .. .. ..$ : int [1:4] 90 776 9698 9776
+  .. .. .. .. ..$ : int [1:4] 1393 3966 6211 7954
+  .. .. .. .. ..$ : int [1:4] 1394 3967 6212 7955
+  .. .. .. .. ..$ : int [1:4] 1395 3968 6213 7956
+  .. .. .. .. ..$ : int [1:4] 1397 3970 6214 7957
+  .. .. .. .. ..$ : int [1:4] 1399 3972 6215 7958
+  .. .. .. .. ..$ : int [1:4] 1398 3971 6216 7959
+  .. .. .. .. ..$ : int [1:4] 101 788 9699 9777
+  .. .. .. .. ..$ : int [1:4] 100 790 9700 9778
+  .. .. .. .. ..$ : int [1:4] 1401 3973 6217 7960
+  .. .. .. .. ..$ : int [1:4] 1403 3975 6218 7961
+  .. .. .. .. ..$ : int [1:4] 1402 3974 6219 7962
+  .. .. .. .. ..$ : int [1:4] 1404 3976 6220 7963
+  .. .. .. .. ..$ : int [1:4] 1405 3977 6221 7964
+  .. .. .. .. ..$ : int [1:4] 1406 3978 6222 7965
+  .. .. .. .. ..$ : int [1:4] 1407 3979 6223 7966
+  .. .. .. .. ..$ : int [1:4] 1408 3980 6224 7967
+  .. .. .. .. ..$ : int [1:4] 1409 3981 6225 7968
+  .. .. .. .. ..$ : int [1:4] 1412 3982 6226 7969
+  .. .. .. .. ..$ : int [1:4] 1413 3983 6227 7970
+  .. .. .. .. ..$ : int [1:4] 1414 3984 6228 7971
+  .. .. .. .. ..$ : int [1:4] 1415 3985 6229 7972
+  .. .. .. .. ..$ : int [1:4] 1417 3987 6230 7973
+  .. .. .. .. ..$ : int [1:4] 1416 3988 6231 7974
+  .. .. .. .. ..$ : int [1:4] 1419 3989 6232 7975
+  .. .. .. .. ..$ : int [1:4] 1421 3991 6233 7976
+  .. .. .. .. ..$ : int [1:4] 1420 3990 6234 7977
+  .. .. .. .. ..$ : int [1:4] 1422 3992 6235 7978
+  .. .. .. .. ..$ : int [1:4] 115 802 9701 9779
+  .. .. .. .. ..$ : int [1:4] 1423 3993 6236 7979
+  .. .. .. .. ..$ : int [1:4] 1425 3995 6237 7980
+  .. .. .. .. ..$ : int [1:4] 1426 3996 6238 7981
+  .. .. .. .. ..$ : int [1:4] 120 807 9702 9780
+  .. .. .. .. ..$ : int [1:4] 1427 3997 6239 7982
+  .. .. .. .. ..$ : int [1:4] 1430 3998 6240 7983
+  .. .. .. .. ..$ : int [1:4] 1431 3999 6241 7984
+  .. .. .. .. ..$ : int [1:4] 123 812 9703 9781
+  .. .. .. .. ..$ : int [1:4] 1432 4000 6242 7985
+  .. .. .. .. ..$ : int [1:4] 1433 4001 6243 7986
+  .. .. .. .. ..$ : int [1:4] 1434 4002 6244 7987
+  .. .. .. .. ..$ : int [1:4] 1436 4005 6245 7988
+  .. .. .. .. ..$ : int [1:4] 1435 4004 6246 7989
+  .. .. .. .. ..$ : int [1:4] 1437 4006 6247 7990
+  .. .. .. .. .. [list output truncated]
+  .. .. .. ..@ filled          : int [1:3644] 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 ...
+  .. .. .. ..@ phenoData       :'data.frame':   4 obs. of  1 variable:
+  .. .. .. .. ..$ class: Factor w/ 2 levels "CLASS1","CLASS2": 1 1 2 2
+  .. .. .. ..@ rt              :List of 2
+  .. .. .. .. ..$ raw      :List of 4
+  .. .. .. .. .. ..$ : num [1:10136] 194 194 194 194 195 ...
+  .. .. .. .. .. ..$ : num [1:10138] 194 194 194 194 195 ...
+  .. .. .. .. .. ..$ : num [1:2614] 3.46 4.75 6.04 7.32 8.61 ...
+  .. .. .. .. .. ..$ : num [1:2613] 3.47 4.77 6.06 7.37 8.66 ...
+  .. .. .. .. ..$ corrected:List of 4
+  .. .. .. .. .. ..$ : num [1:10136] 168 169 169 169 169 ...
+  .. .. .. .. .. ..$ : num [1:10138] 157 158 158 158 158 ...
+  .. .. .. .. .. ..$ : num [1:2614] 40.5 41.8 43.1 44.4 45.7 ...
+  .. .. .. .. .. ..$ : num [1:2613] 40.7 42 43.3 44.6 45.9 ...
+  .. .. .. ..@ filepaths       : chr [1:4] "E:/workspace/PRIMS-metabolomics/python-tools/tools/metaMS/test/zipOut/CLASS1/STDmix_GC_01.CDF" "E:/workspace/PRIMS-metabolomics/python-tools/tools/metaMS/test/zipOut/CLASS1/STDmix_GC_02.CDF" "E:/workspace/PRIMS-metabolomics/python-tools/tools/metaMS/test/zipOut/CLASS2/STDmix_RP_pos01.CDF" "E:/workspace/PRIMS-metabolomics/python-tools/tools/metaMS/test/zipOut/CLASS2/STDmix_RP_pos02.CDF"
+  .. .. .. ..@ profinfo        :List of 2
+  .. .. .. .. ..$ method: chr "bin"
+  .. .. .. .. ..$ step  : num 0.05
+  .. .. .. ..@ dataCorrection  : int(0)
+  .. .. .. ..@ polarity        : chr(0)
+  .. .. .. ..@ progressInfo    :List of 12
+  .. .. .. .. ..$ group.density          : num 0
+  .. .. .. .. ..$ group.mzClust          : num 0
+  .. .. .. .. ..$ group.nearest          : num 0
+  .. .. .. .. ..$ findPeaks.centWave     : num 0
+  .. .. .. .. ..$ findPeaks.massifquant  : num 0
+  .. .. .. .. ..$ findPeaks.matchedFilter: num 0
+  .. .. .. .. ..$ findPeaks.MS1          : num 0
+  .. .. .. .. ..$ findPeaks.MSW          : num 0
+  .. .. .. .. ..$ retcor.obiwarp         : num 0
+  .. .. .. .. ..$ retcor.peakgroups      : num 0
+  .. .. .. .. ..$ fillPeaks.chrom        : num 0
+  .. .. .. .. ..$ fillPeaks.MSW          : num 0
+  .. .. .. ..@ progressCallback:function (progress)
+  .. ..@ ruleset          : NULL
+  .. ..@ annoID           : logi[0 , 1:4]
+  .. .. ..- attr(*, "dimnames")=List of 2
+  .. .. .. ..$ : NULL
+  .. .. .. ..$ : chr [1:4] "id" "grpID" "ruleID" "parentID"
+  .. ..@ annoGrp          : logi[0 , 1:4]
+  .. .. ..- attr(*, "dimnames")=List of 2
+  .. .. .. ..$ : NULL
+  .. .. .. ..$ : chr [1:4] "id" "mass" "ips" "psgrp"
+  .. ..@ isoID            : num [1:315, 1:4] 14 90 107 121 134 137 148 152 157 170 ...
+  .. .. ..- attr(*, "dimnames")=List of 2
+  .. .. .. ..$ : NULL
+  .. .. .. ..$ : chr [1:4] "mpeak" "isopeak" "iso" "charge"
+  .. ..@ polarity         : chr ""
+  .. ..@ runParallel      :List of 1
+  .. .. ..$ enable: num 0
+  .. ..@ .__classVersion__:Formal class 'Versions' [package "Biobase"] with 1 slots
+  .. .. .. ..@ .Data:List of 1
+  .. .. .. .. ..$ : int [1:3] 1 13 333
+ $ Annotation:List of 5
+  ..$ annotation.table    :'data.frame':        27 obs. of  13 variables:
+  .. ..$ feature     : int [1:27] 190 193 195 206 208 209 212 367 368 371 ...
+  .. ..$ db_position : int [1:27] 21 22 23 21 23 24 22 1 2 4 ...
+  .. ..$ ChemSpiderID: int [1:27] 10463792 10463792 10463792 10463792 10463792 10463792 10463792 8711 8711 8711 ...
+  .. ..$ mz          : num [1:27] 137 348 696 137 696 ...
+  .. ..$ rt          : num [1:27] 10.8 10.8 10.8 11.2 11.2 ...
+  .. ..$ I           : num [1:27] 10660 3754.5 32.4 5127 58.2 ...
+  .. ..$ compound    : Factor w/ 4 levels "D-(+)-Catechin",..: 4 4 4 4 4 4 4 1 1 1 ...
+  .. ..$ db_mz       : num [1:27] 137 348 696 137 696 ...
+  .. ..$ db_rt       : num [1:27] 10.8 10.8 10.8 10.8 10.8 ...
+  .. ..$ db_I        : num [1:27] 33.4 4478.7 58.2 33.4 58.2 ...
+  .. ..$ db_ann      : Factor w/ 1 level "automatic": 1 1 1 1 1 1 1 1 1 1 ...
+  .. ..$ mz.err      : num [1:27] 0.005 0.005 0.005 0.005 0.005 0.005 0.005 0.005 0.005 0.005 ...
+  .. ..$ clid        : int [1:27] 1 1 1 2 2 2 2 2 2 2 ...
+  ..$ compounds           : chr [1:4] "adenosine 2'-monophosphate" "D-(+)-Catechin" "malvidin-3-glucoside" "rutin "
+  ..$ ChemSpiderIDs       : int [1:4] 10463792 8711 391785 4444362
+  ..$ multiple.annotations: num(0)
+  ..$ ann.features        : int [1:27] 190 193 195 206 208 209 212 367 368 371 ...
+ $ Settings  :Formal class 'metaMSsettings' [package "metaMS"] with 30 slots
+  .. ..@ protocolName                      : chr "Synapt.QTOF.RP"
+  .. ..@ chrom                             : chr "LC"
+  .. ..@ PeakPicking                       :List of 5
+  .. .. ..$ method  : chr "matchedFilter"
+  .. .. ..$ step    : num 0.05
+  .. .. ..$ fwhm    : num 20
+  .. .. ..$ snthresh: num 4
+  .. .. ..$ max     : num 50
+  .. ..@ Alignment                         :List of 9
+  .. .. ..$ min.class.fraction: num 0.3
+  .. .. ..$ min.class.size    : num 3
+  .. .. ..$ mzwid             : num 0.1
+  .. .. ..$ bws               : num [1:2] 30 10
+  .. .. ..$ missingratio      : num 0.2
+  .. .. ..$ extraratio        : num 0.1
+  .. .. ..$ retcormethod      : chr "linear"
+  .. .. ..$ retcorfamily      : chr "symmetric"
+  .. .. ..$ fillPeaks         : logi TRUE
+  .. ..@ CAMERA                            :List of 3
+  .. .. ..$ perfwhm   : num 0.6
+  .. .. ..$ cor_eic_th: num 0.7
+  .. .. ..$ ppm       : num 5
+  .. ..@ match2DB.rtdiff                   : num 1.5
+  .. ..@ match2DB.minfeat                  : num 2
+  .. ..@ match2DB.rtval                    : num 0.1
+  .. ..@ match2DB.mzdiff                   : num 0.005
+  .. ..@ match2DB.ppm                      : num 5
+  .. ..@ match2DB.simthresh                : num(0)
+  .. ..@ match2DB.timeComparison           : chr(0)
+  .. ..@ match2DB.RIdiff                   : num(0)
+  .. ..@ DBconstruction.minfeat            : num(0)
+  .. ..@ DBconstruction.rttol              : num(0)
+  .. ..@ DBconstruction.mztol              : num(0)
+  .. ..@ DBconstruction.minintens          : num(0)
+  .. ..@ DBconstruction.intensityMeasure   : chr(0)
+  .. ..@ DBconstruction.DBthreshold        : num(0)
+  .. ..@ matchIrrelevants.irrelevantClasses: chr(0)
+  .. ..@ matchIrrelevants.simthresh        : num(0)
+  .. ..@ matchIrrelevants.timeComparison   : chr(0)
+  .. ..@ matchIrrelevants.RIdiff           : num(0)
+  .. ..@ matchIrrelevants.rtdiff           : num(0)
+  .. ..@ betweenSamples.min.class.fraction : num(0)
+  .. ..@ betweenSamples.min.class.size     : num(0)
+  .. ..@ betweenSamples.timeComparison     : chr(0)
+  .. ..@ betweenSamples.rtdiff             : num(0)
+  .. ..@ betweenSamples.RIdiff             : num(0)
+  .. ..@ betweenSamples.simthresh          : num(0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/README.txt	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,70 @@
+Wrappers for:
+- the metaMS R package by Ron Wehrens.
+- the xcms package.
+
+Wrappers written by Pieter Lukasse.
+
+
+Installation  (when updating: close all vignettes [i.e. pdfs/manuals] !)
+
+In R:
+source("http://bioconductor.org/biocLite.R")
+biocLite("metaMS")
+biocLite("multtest")
+#biocLite("R2HTML")
+# for "multi-threading"  (actually starts multiple R processes for parallel processing):
+install.packages("snow")
+install.packages("Cairo")
+
+======Run metaMS_cmd_pick_and_group.r with:=================
+
+E:\workspace\PRIMS-metabolomics\python-tools\tools\metaMS>Rscript metaMS_cmd_pick_and_group.r test/extdata.zip test/example_settings.txt test/out/peakTable.txt test/out/xsAnnotatePrep.rdata positive test/out/html_peaks.html test/out
+
+
+======Run metaMS_cmd_annotate.r with:=================
+
+E:\workspace\PRIMS-metabolomics\python-tools\tools\metaMS>Rscript metaMS_cmd_annotate.r test/LCDBtest.RData test/out/xsAnnotatePrep.rdata test/example_settings.txt test/out/annotationTable.txt "0" test/out/html_annot.html test/out
+
+
+======Run xcms_differential_analysis.r with:=================
+
+E:\workspace\PRIMS-metabolomics\python-tools\tools\metaMS>Rscript xcms_differential_analysis.r test/out/xsAnnotatePrep.rdata "CLASS1" "CLASS2" 10 test/out2/outtable.tsv test/out2/html/html.html test/out2/html
+
+
+======Run xcms_get_eic.r with:=================
+
+
+E:\workspace\PRIMS-metabolomics\python-tools\tools\metaMS>Rscript xcms_get_alignment_eic.r test/out/xsAnnotatePrep.rdata 10 300 3 STDmix_GC_01,STDmix_GC_02 test/out3/html/html.html test/out3/html
+
+OR
+
+E:\workspace\PRIMS-metabolomics\python-tools\tools\metaMS>Rscript xcms_get_mass_eic.r test/out/xsAnnotatePrep.rdata 10 3000 -1 -1 "77.98,231.96" 5 STDmix_GC_01,STDmix_GC_02 Yes raw test/out4/html/html.html test/out4/html
+
+
+
+!!!!!!!!!!!!!!!!Troubleshooting:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+
+NetCDF is required. If the following is found in the installation.log
+
+In file included from rnetCDF.c:2:0:
+rnetCDF.h:1:20: fatal error: netcdf.h: No such file or directory
+compilation terminated.
+
+
+then metaMS will not have been installed and running the tool will result in error:
+<simpleError in library(metaMS): there is no package called 'metaMS'>
+
+Possible solution:
+> Install the -dev of those packages to get the headers that are
+  required to compile the package. In this case, you need libnetcdf-dev, udunits-bin and libudunits2-dev
+  (from http://stackoverflow.com/questions/11319698/how-to-install-r-packages-rnetcdf-and-ncdf-on-ubuntu)
+So
+>>sudo apt-get install libnetcdf-dev, udunits-bin and libudunits2-dev
+
+Cairo / no X11 mode is required.
+
+Possible solution:
+> install of cairo (http://www.cairographics.org/) and/or set CAIRO_CFLAGS/LIBS correspondingly.
+So
+>>sudo apt-get install libcairo2-dev
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/metaMS_cmd_annotate.r	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,86 @@
+## read args:
+args <- commandArgs(TRUE)
+## the constructed DB, e.g. "E:/Rworkspace/metaMS/data/LCDBtest.RData"
+args.constructedDB <- args[1]
+## data file in xset format:
+args.xsetData <- args[2]
+## settings file, e.g. "E:/Rworkspace/metaMS/data/settings.r", should contain assignment to an object named "customMetaMSsettings"
+args.settings <- args[3]
+
+## output file names, e.g. "E:/Rworkspace/metaMS/data/out.txt"
+args.outAnnotationTable <- args[4]
+
+args.mass_error_function <- args[5]
+if (args.mass_error_function == "0")
+	args.mass_error_function <- NULL
+## report files
+args.htmlReportFile <- args[6]
+args.htmlReportFile.files_path <- args[7]
+
+if (length(args) == 8)
+{
+	args.outLogFile <- args[8]
+	# suppress messages:
+	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
+	msg <- file(args.outLogFile, open="wt")
+	sink(msg, type="message")
+	sink(msg, type="output")
+}
+
+cat("\nSettings used===============:\n")
+cat(readChar(args.settings, 1e5))
+
+
+tryCatch(
+        {
+	        library(metaMS)
+
+			## load the constructed DB :
+			tempEnv <- new.env()
+			testDB <- load(args.constructedDB, envir=tempEnv)
+			xsetData <- readRDS(args.xsetData)
+
+			## load settings "script" into "customMetaMSsettings"
+			source(args.settings, local=tempEnv)
+			message(paste(" loaded : ", args.settings))
+
+			# Just to highlight: if you want to use more than one
+			# trigger runLC:
+			LC <- runLC(xset=xsetData, settings = tempEnv[["customMetaMSsettings"]], DB = tempEnv[[testDB[1]]]$DB, errf=args.mass_error_function, nSlaves=20, returnXset = TRUE)
+
+			# write out runLC annotation results:
+			write.table(LC$PeakTable, args.outAnnotationTable, sep="\t", row.names=FALSE)
+
+			# the used constructed DB (write to log):
+			cat("\nConstructed DB info===============:\n")
+			str(tempEnv[[testDB[1]]]$Info)
+			cat("\nConstructed DB table===============:\n")
+			if (length(args) == 8)
+			{
+				write.table(tempEnv[[testDB[1]]]$DB, args.outLogFile, append=TRUE, row.names=FALSE)
+				write.table(tempEnv[[testDB[1]]]$Reftable, args.outLogFile, sep="\t", append=TRUE, row.names=FALSE)
+			}
+
+			message("\nGenerating report.........")
+			# report
+			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
+			html <- "<html><body><h1>Summary of annotation results:</h1>"
+			nrTotalFeatures <- nrow(LC$PeakTable)
+			nrAnnotatedFeatures <- nrow(LC$Annotation$annotation.table)
+			html <- paste(html,"<p>Total nr of features: ", nrTotalFeatures,"</p>", sep="")
+			html <- paste(html,"<p>Total nr of annotated features: ", nrAnnotatedFeatures,"</p>", sep="")
+
+			html <- paste(html,"</body><html>")
+			message("finished generating report")
+			write(html,file=args.htmlReportFile)
+			# unlink(args.htmlReportFile)
+			cat("\nWarnings================:\n")
+			str( warnings() )
+		},
+        error=function(cond) {
+            sink(NULL, type="message") # default setting
+			sink(stderr(), type="output")
+            message("\nERROR: ===========\n")
+            print(cond)
+        }
+    )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/metaMS_cmd_pick_and_group.r	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,92 @@
+## read args:
+args <- commandArgs(TRUE)
+## data files, e.g. "E:/Rworkspace/metaMS/data/data.zip" (with e.g. .CDF files) and unzip output dir, e.g. "E:/"
+args.dataZip <- args[1]
+args.zipExtrDir <- sub("\\.","_",paste(args[1],"dir", sep=""))
+dir.create(file.path(args.zipExtrDir), showWarnings = FALSE, recursive = TRUE)
+## settings file, e.g. "E:/Rworkspace/metaMS/data/settings.r", should contain assignment to an object named "customMetaMSsettings"
+args.settings <- args[2]
+
+## output file names, e.g. "E:/Rworkspace/metaMS/data/out.txt"
+args.outPeakTable <- args[3]
+args.xsetOut <- args[4]
+
+# polarity as explicit parameter:
+args.runLC_polarity <- args[5]
+
+## report files
+args.htmlReportFile <- args[6]
+args.htmlReportFile.files_path <- args[7]
+
+
+if (length(args) == 8)
+{
+	args.outLogFile <- args[8]
+	# suppress messages:
+	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
+	msg <- file(args.outLogFile, open="wt")
+	sink(msg, type="message")
+	sink(msg, type="output")
+}
+
+cat("\nSettings used===============:\n")
+cat(readChar(args.settings, 1e5))
+
+
+tryCatch(
+        {
+	        library(metaMS)
+
+			## load the data files from a zip file
+			files <- unzip(args.dataZip, exdir=args.zipExtrDir)
+
+			## load settings "script" into "customMetaMSsettings"
+			tempEnv <- new.env()
+			source(args.settings, local=tempEnv)
+			message(paste(" loaded : ", args.settings))
+			allSettings <- tempEnv[["customMetaMSsettings"]]
+
+			# trigger runLC:
+			LC <- runLC(files, settings = allSettings, polarity=args.runLC_polarity, nSlaves=20, returnXset = TRUE)
+
+			# write out runLC annotation results:
+			write.table(LC$PeakTable, args.outPeakTable, sep="\t", row.names=FALSE)
+
+			# save xset as rdata:
+			xsAnnotatePreparedData <- LC$xset
+			saveRDS(xsAnnotatePreparedData, file=args.xsetOut)
+
+			message("\nGenerating report.........")
+			# report
+			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
+			html <- "<html><body><h1>Info on alignment quality </h1>"
+			# TODO add (nr and mass error) and group size
+
+			message("\nPlotting figures... ")
+			figureName <- paste(args.htmlReportFile.files_path, "/figure_retcor.png", sep="")
+			html <- paste(html,"<img src='figure_retcor.png' /><br/>", sep="")
+			png( figureName, type="cairo", width=1100,height=600 )
+			retcor(LC$xset@xcmsSet, method="peakgroups", plottype = "mdevden")
+			html <- paste(html,"<a>*NB: retention time correction plot based on 'peakgroups' option with default settings. This is not the plot matching the exact settings used in the run,
+									but just intended to give a rough estimate of the retention time shifts present in the data. A more accurate plot will be available once
+                                    this option is added in metaMS API. </a><br/>", sep="")
+			devname = dev.off()
+
+
+			gt <- groups(LC$xset@xcmsSet)
+			groupidx1 <- which(gt[,"rtmed"] > 0 & gt[,"rtmed"] < 3000 & gt[,"npeaks"] > 3)
+
+			html <- paste(html,"</body><html>")
+			message("finished generating report")
+			write(html,file=args.htmlReportFile)
+			# unlink(args.htmlReportFile)
+			cat("\nWarnings================:\n")
+			str( warnings() )
+		},
+        error=function(cond) {
+            sink(NULL, type="message") # default setting
+			sink(stderr(), type="output")
+            message("\nERROR: ===========\n")
+            print(cond)
+        }
+    )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/metams_lcms_annotate.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,121 @@
+<tool id="metams_lcms_annotate" name="METAMS-LC/MS Annotate"  version="0.0.4">
+	<description> Runs metaMS process for LC/MS feature annotation</description>
+	<requirements>
+		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
+	</requirements>
+	<command interpreter="Rscript">
+		metaMS_cmd_annotate.r
+	    $constructed_db
+	    $xsetData
+	    $customMetaMSsettings
+	    $outputFile
+	    #if $mzTol.mzTolType == "fixed"
+			0
+		#else
+            "$mzTol.mass_error_function"
+		#end if
+	    $htmlReportFile
+	    $htmlReportFile.files_path
+	    $outputLog
+	</command>
+<inputs>
+	<param name="constructed_db" type="select" label="Constructed DB" help="Reference annotation database generated from matching measurements of a mixture of chemical standards
+	against a manually validated reference table which contains the key analytical information for each standard."
+      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/metaMS")'/>
+
+	<param name="xsetData" type="data" format="rdata" label="xcmsSet data file (xset RDATA)" help="E.g. output data file resulting from METAMS 'feature picking, aligning and grouping' run"/>
+
+	<param name="protocolName" type="text" size="30" label="protocolName" value="e.g. Synapt.QTOF.RP"
+		help="Choose a name to give for the specific settings in the parameters below"/>
+
+	<param name="rtdiff" type="float" size="10" value="1.5" label="rtdiff" help="(Annotation) Allowed rt difference (in minutes)"/>
+
+	<conditional name="mzTol">
+		<param name="mzTolType" type="select" size="30" label="(Annotation) m/z tolerance type">
+			<option value="fixed" selected="true">Fixed tolerance</option>
+			<option value="adaptive" >Adaptive tolerance</option>
+		</param>
+		<when value="fixed">
+			<param name="mzdiff" type="float" size="10" value="0.005" label="mzdiff" help="(Annotation) Fixed mass tolerance" />
+		</when>
+		<when value="adaptive">
+			<param name="ppm" type="float" size="10" value="5.0" label="ppm" help="(Annotation) Tolerance in ppm" />
+			<param name="mass_error_function" type="text" area="true" size="3x70" label="(Annotation) Mass error function"/>
+		</when>
+	</conditional>
+
+	<param name="rtval" type="float" size="10" value="0.1" label="(max)rtval" help="(Validation) Group items are clustered once more with hierarchical clustering ('complete' method)
+	          based on their rt distances. Here one can specify the rt threshold for removing the items that have too diverging rt (the ones with rt difference
+	          larger than rtval). " />
+	<param name="minfeat" type="integer" size="10" value="2" label="minfeat"
+	           help="(Validation) Threshold for the minimum number of features a
+	           cluster/group should have (after rtval filtering above). Other clusters/groups are filtered out." />
+
+</inputs>
+<configfiles>
+
+<configfile name="customMetaMSsettings">## start comment
+		## metaMS process settings
+		customMetaMSsettings &lt;- metaMSsettings(protocolName = "${protocolName}",
+                            chrom = "LC")
+metaSetting(customMetaMSsettings, "match2DB") &lt;- list(
+            rtdiff = ${rtdiff},
+            rtval = ${rtval},
+		#if $mzTol.mzTolType == "fixed"
+			mzdiff = ${mzTol.mzdiff},
+		#else
+            ppm = ${mzTol.ppm},
+		#end if
+            minfeat = ${minfeat})</configfile>
+
+</configfiles>
+
+<outputs>
+	<data name="outputFile" format="tabular" label="${tool.name} on ${on_string} - metaMS annotated file (TSV)"/>
+	<data name="outputLog" format="txt" label="${tool.name} on ${on_string} - metaMS LOG" hidden="True"/>
+	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - metaMS report (HTML)"/>
+</outputs>
+<tests>
+	<test>
+	</test>
+</tests>
+<code file="../match_library.py" /> <!-- file containing get_directory_files function used above-->
+<help>
+
+.. class:: infomark
+
+Runs metaMS process for LC/MS feature annotation based on matching to an existing 'standards' DB.
+The figure below shows the main parts of this metaMS process.
+
+.. image:: $PATH_TO_IMAGES/metaMS_annotate.png
+
+
+.. class:: infomark
+
+The implemented annotation strategy can be broken down in the following steps:
+
+1. *Feature wise Annotation:* Each feature detected by runLC is matched against the database. If
+the mass error function is provided, the appropriate m/z tolerance is calculated, otherwise a fixed
+tolerance is used (mzdiff). The retention time tolerance is fixed and should be selected on the
+bases of the characteristics of each chromatographic method (rtdiff). Multiple annotations - i.e.
+features which are associated to more than one compound - are possible. This outcome does not
+indicate a problem per se, but is an inherent drawback of co-elution.
+
+2. *Annotation Validation:* The annotated features are organized in 'pseudospectra' collecting all
+the experimental features which are assigned to a specific compound. A specific annotation is
+confirmed only if more than minfeat features which differ in retention time less than rtval are
+present in a pseudospectrum. As a general rule rtval should be narrower than rtdiff. The
+latter, indeed, accounts for shifts in retention time between the injection of the standards and the
+metabolomics experiment under investigation. This time can be rather long, considering that the
+standards are not commonly re-analyzed each time. On the other hand, rtval represents the shift
+between the ions of the same compound within the same batch of injections and therefore it has
+only to account for the smaller shifts occurring during peak picking and alignment.
+
+
+  </help>
+  <citations>
+        <citation type="doi">10.1016/j.jchromb.2014.02.051</citation> <!-- example
+        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
+        -->
+   </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/metams_lcms_pick_and_group.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,301 @@
+<tool id="metams_lcms_pick_and_group" name="METAMS-LC/MS Pick, Align and Group"  version="0.0.4">
+	<description> Runs metaMS process for LC/MS feature picking, aligning and grouping</description>
+	<requirements>
+		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
+	</requirements>
+	<command interpreter="Rscript">
+		metaMS_cmd_pick_and_group.r
+	    $data_files
+	    $customMetaMSsettings
+	    $outputFile
+	    $xsetOut
+	    $polarity
+	    $htmlReportFile
+	    $htmlReportFile.files_path
+	    $outputLog
+	</command>
+<inputs>
+	<param name="data_files" type="data" format="prims.fileset.zip" label="Data files (.zip file with CDF, mzML or mzXML files)" help=".zip file containing the CDF, mzML or mzXML files of the new measurements"/>
+
+	<param name="protocolName" type="text" size="30" label="protocolName" value="e.g. Synapt.QTOF.RP"
+		help="Choose a name to give for the specific settings in the parameters below"/><!-- TODO - let user choose this -->
+
+	<param name="polarity" type="select" size="30" label="polarity"
+		help="Which polarity mode was used for measuring of the ms sample">
+		<option value="positive" selected="true">positive</option>
+		<option value="negative" >negative</option>
+	</param>
+
+
+	<!-- ===========NB : if peak picking, alignment OR CAMERA settings have to be reused for runGC wrapper in the future, we can use Galaxy macro expansions here
+	                     to avoid defining these parameters again in the runGC wrapper ========================= -->
+	<conditional name="peakPicking">
+		<param name="method" type="select" size="30" label="PEAK PICKING method ====================================================="
+		help="matchedFilter=Feature detection in the chromatographic time domain ; centWave=Feature detection for high resolution LC/MS data">
+			<option value="matchedFilter" selected="true">matchedFilter</option>
+			<option value="centWave" >centWave</option>
+		</param>
+		<when value="matchedFilter">
+			<param name="fwhm" type="integer" size="10" value="20" label="fwhm"
+			help="full width at half maximum of matched filtration gaussian model peak. Only used to calculate the actual sigma" />
+			<param name="sigma_denom" type="float" size="10" value="2.3548" label="sigma_denominator"
+			help="denominator for standard deviation (width) of matched filtration model peak (e.g. sigma = fwhm/2.3548)" />
+			<param name="max" type="integer" size="10" value="50" label="max"
+			help="maximum number of peaks per extracted ion chromatogram" />
+			<param name="snthresh" type="integer" size="10" value="4" label="snthresh"
+			help="signal to noise ratio cutoff" />
+			<param name="step" type="float" size="10" value="0.05" label="step"
+			help="step size to use for profile generation"/>
+			<param name="steps" type="integer" size="10" value="2" label="steps"
+			help="number of steps to merge prior to filtration"/>
+			<param name="mzdiff" type="float" size="10" value="0.8" label="mzdiff"
+			help="minimum difference in m/z for peaks with overlapping retention times"/>
+		</when>
+		<when value="centWave">
+			<param name="ppm" type="integer" size="10" value="25" label="ppm"
+			help="maxmial tolerated m/z deviation in consecutive scans, in ppm" />
+			<param name="peakwidth" type="text" size="10" value="20,50" label="peakwidth"
+			help="Chromatographic peak width, given as range (min,max) in seconds" />
+			<param name="snthresh" type="integer" size="10" value="10" label="snthresh"
+			help="signal to noise ratio cutoff" />
+			<param name="prefilter" type="text" size="10" value="3,100" label="prefilter=c(k,I)"
+				help="Prefilter step for the first phase. Mass traces are only retained if
+				they contain at least k peaks with intensity &gt; = I" />
+			<param name="mzCenterFun" type="select" size="30" label="mzCenterFun"
+				help="Function to calculate the m/z center of the feature: wMean intensity weighted mean of the
+				feature m/z values, mean mean of the feature m/z values, apex use m/z value at peak apex,
+				wMeanApex3 intensity weighted mean of the m/z value at peak apex and the m/z value left and
+				right of it, meanApex3 mean of the m/z value at peak apex and the m/z value left and right of it">
+				<option value="wMean" selected="true">wMean</option>
+				<option value="mean" >mean</option>
+				<option value="apex" >apex</option>
+				<option value="wMeanApex3" >wMeanApex3</option>
+				<option value="meanApex3" >meanApex3</option>
+			</param>
+			<param name="integrate" type="select" size="30" label="integrate"
+				help="Integration method. If =1 peak limits are found through descent
+				on the mexican hat filtered data, if =2 the descent is done on the real data.
+				Method 2 is very accurate but prone to noise, while method 1 is more robust to noise but less exact">
+				<option value="1" selected="true">1</option>
+				<option value="2" >2</option>
+			</param>
+			<param name="mzdiff" type="float" size="10" value="-0.001" label="mzdiff"
+				help="minimum difference in m/z for peaks with overlapping retention times, can be negative to allow overlap" />
+			<param name="fitgauss" type="integer" size="10" value="20" label="fitgauss"
+				help="logical, if Yes: a Gaussian is fitted to each peak" >
+				<option value="TRUE" selected="true">Yes</option>
+				<option value="FALSE" >No</option>
+			</param>
+			<param name="noise" type="integer" size="10" value="0" label="noise"
+				help="optional argument which is useful for data that was centroided without any intensity
+				 threshold, centroids with intensity &lt; noise are omitted from ROI detection" />
+		</when>
+	</conditional>
+
+
+	<param name="min_class_fraction" type="float" size="10" value="0.3" label="ALIGNMENT min.class.fraction ====================================================="
+		    help="Minimum fraction of samples necessary in the alignment to make it a valid alignment/group"/>
+	<param name="min_class_size" type="integer" size="10" value="3" label="min.class.size"
+			help="Minimum number of samples necessary in the alignment to make it a valid alignment/group. The lowest criteria
+			between this and min.class.fraction will be used." />
+	<param name="mzwid" type="float" size="10" value="0.1" label="mzwid"
+			help="width of overlapping m/z slices to use for creating peak density chromatograms and grouping peaks across samples"/>
+	<param name="bws" type="text" size="10" value="30,10" label="bws"
+			help="bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel
+			to apply to the peak density chromatogram. Fill in two values separated by comma. First value is used for
+			first grouping round, seccond value is used for last grouping/alignment round after retention time
+			correction. "/>
+
+	<conditional name="retcor">
+		<param name="retcormethod" type="select" size="30" label="retcormethod"
+			help="retention time correction method. 'peakgroups' is the default density based approach, 'obiwarp' is
+			  alignment data by Ordered Bijective Interpolated Warping ">
+			<option value="peakgroups" selected="true">peakgroups</option>
+			<option value="obiwarp" >obiwarp</option>
+		</param>
+		<when value="peakgroups">
+			<param name="retcorfamily" type="select" size="30" label="retcorfamily"
+				help="retention time correction method type/family">
+				<option value="symmetric" selected="true">symmetric</option>
+				<option value="gaussian">gaussian</option>
+			</param>
+			<param name="smooth" type="select" size="30" label="smooth"
+				help="either 'loess' for non-linear alignment or 'linear' for linear alignment">
+				<option value="linear" selected="true">linear</option>
+				<option value="loess">loess (TODO - waiting for metaMS to add/parse this option)</option>
+			</param>
+			<param name="missingratio" type="float" size="10" value="0.2" label="missingratio"
+				help="ratio of missing samples to allow in retention time correction groups"/>
+			<param name="extraratio" type="float" size="10" value="0.1" label="extraratio"
+				help="ratio of extra peaks to allow in retention time correction correction groups"/>
+		</when>
+		<when value="obiwarp">
+			<param name="profStep" type="integer" size="10" value="1" label="profStep"
+				help="step size (in m/z) to use for profile generation from the raw data files" />
+		</when>
+	</conditional>
+
+	<param name="fillPeaks" type="select" size="30" label="fillPeaks"
+		help="Fill missing peaks in peak groups/alignments that do not include peaks from every sample.
+		This method produces intensity values for those missing samples by integrating raw data in peak group region.">
+		<option value="TRUE" selected="true">Yes</option>
+		<option value="FALSE">No</option>
+	</param>
+	<param name="perfwhm" type="float" size="10" value="0.6" label="CAMERA perfwhm ====================================================="
+		help="percentage of FWHM width"/>
+	<param name="cor_eic_th" type="float" size="10" value="0.7" label="cor_eic_th"
+		help="correlation threshold (0..1)" />
+	<param name="ppm" type="float" size="10" value="5.0" label="ppm"
+		help="General ppm error" />
+
+	<param name="groupCorr_graphMethod" type="select" size="30" label="(groupCorr)graphMethod"
+		help="Method selection for grouping peaks after correlation analysis into pseudospectra.">
+		<option value="hcs" selected="true">hcs</option>
+	</param>
+
+	<param name="groupCorr_pval" type="float" size="10" value="0.05" label="(groupCorr)pval"
+		help="significant correlation threshold" />
+
+	<param name="groupCorr_calcCiS" type="select" size="30" label="(groupCorr)calcCiS"
+		help="Use correlation inside samples for peak grouping">
+		<option value="TRUE" selected="true">Yes</option>
+		<option value="FALSE">No</option>
+	</param>
+
+	<param name="groupCorr_calcIso" type="select" size="30" label="(groupCorr)calcIso"
+		help="Use isotopic relationship for peak grouping">
+		<option value="TRUE" >Yes</option>
+		<option value="FALSE" selected="true">No</option>
+	</param>
+
+	<param name="groupCorr_calcCaS" type="select" size="30" label="(groupCorr)calcCaS"
+		help="Use correlation across samples for peak grouping">
+		<option value="TRUE" >Yes</option>
+		<option value="FALSE" selected="true">No</option>
+	</param>
+
+
+	<param name="findIsotopes_maxcharge" type="integer" size="10" value="3" label="(findIsotopes)maxcharge"
+			help="max. ion charge" />
+
+	<param name="findIsotopes_maxiso" type="integer" size="10" value="4" label="(findIsotopes)maxiso"
+			help="max. number of expected isotopes" />
+
+	<param name="findIsotopes_minfrac" type="float" size="10" value="0.5" label="(findIsotopes)minfrac"
+		help="The ratio for the number of samples, which must satisfy the C12/C13 rule for isotope annotation" />
+
+
+	<param name="findAdducts_multiplier" type="integer" size="10" value="3" label="(findAdducts)multiplier"
+			help="If no ruleset is provided, calculate ruleset with max. number n of [nM+x] clusterions" />
+
+
+
+</inputs>
+<configfiles>
+
+<configfile name="customMetaMSsettings">## ====================================
+		## metaMS process settings
+		customMetaMSsettings &lt;- metaMSsettings(protocolName = "${protocolName}",
+                            chrom = "LC",
+                            PeakPicking = list(
+                              method = "${peakPicking.method}",
+							#if $peakPicking.method == "matchedFilter"
+								fwhm = ${peakPicking.fwhm},
+								sigma = ${peakPicking.fwhm}/${peakPicking.sigma_denom},
+								max = ${peakPicking.max},
+								snthresh = ${peakPicking.snthresh},
+								step = ${peakPicking.step},
+								steps = ${peakPicking.steps},
+								mzdiff = ${peakPicking.mzdiff}),
+							#else
+								ppm = ${peakPicking.ppm},
+								peakwidth = c(${peakPicking.peakwidth}),
+								snthresh = ${peakPicking.snthresh},
+								prefilter = c(${peakPicking.prefilter}),
+								mzCenterFun = "${peakPicking.mzCenterFun}",
+								integrate = ${peakPicking.integrate},
+								mzdiff = ${peakPicking.mzdiff},
+								fitgauss = ${peakPicking.fitgauss},
+								noise = ${peakPicking.noise}),
+							#end if
+                            Alignment = list(
+                              min.class.fraction = ${min_class_fraction},
+                              min.class.size = ${min_class_size},
+                              mzwid = ${mzwid},
+                              bws = c(${bws}),
+                              retcormethod = "${retcor.retcormethod}",
+							#if $retcor.retcormethod == "peakgroups"
+                           		smooth = "${retcor.smooth}",
+                            	missingratio = ${retcor.missingratio},
+                            	extraratio = ${retcor.extraratio},
+                            	retcorfamily = "${retcor.retcorfamily}",
+							#else
+								##repeating the method as workaround/ backwards compatibility (can remove this one after fix from metaMS):
+								method = "${retcor.retcormethod}",
+								profStep = ${retcor.profStep},
+							#end if
+                              fillPeaks = ${fillPeaks}),
+                            CAMERA = list(
+                              perfwhm = ${perfwhm},
+                              cor_eic_th = ${cor_eic_th},
+                              ppm= ${ppm},
+                              graphMethod= "${groupCorr_graphMethod}",
+                              pval= ${groupCorr_pval},
+                              calcCiS= ${groupCorr_calcCiS},
+                              calcIso= ${groupCorr_calcIso},
+                              calcCaS= ${groupCorr_calcCaS},
+                              maxcharge= ${findIsotopes_maxcharge},
+                              maxiso= ${findIsotopes_maxiso},
+                              minfrac= ${findIsotopes_minfrac},
+                              multiplier= ${findAdducts_multiplier}
+                              ))</configfile>
+
+</configfiles>
+
+<outputs>
+	<data name="outputFile" format="tabular" label="${tool.name} on ${on_string} - peaks table (TSV)"/>
+	<data name="outputLog" format="txt" label="${tool.name} on ${on_string} - LOG" hidden="True"/>
+	<data name="xsetOut" format="rdata" label="${tool.name} on ${on_string} - xcmsSet (RDATA)"/>
+	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - report (HTML)"/>
+</outputs>
+<tests>
+	<test>
+	</test>
+</tests>
+<help>
+
+.. class:: infomark
+
+Runs metaMS process for LC/MS feature feature picking, aligning and grouping.
+This part of the metaMS process makes use of the XCMS and CAMERA tools and algorithms.
+CAMERA is used for automatic deconvolution/annotation of LC/ESI-MS data.
+The figure below shows the main parts of the metaMS process wrapped by this tool.
+
+.. image:: $PATH_TO_IMAGES/metaMS_pick_align_camera.png
+
+
+From CAMERA documentation:
+
+.. image:: $PATH_TO_IMAGES/CAMERA_results.png
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Wehrens, R.; Weingart, G.; Mattivi, F. (2014).
+metaMS: an open-source pipeline for GC-MS-based untargeted metabolomics.
+Journal of chromatography B: biomedical sciences and applications, 996 (1): 109-116.
+doi: 10.1016/j.jchromb.2014.02.051
+handle: http://hdl.handle.net/10449/24012
+
+Wrapper by Pieter Lukasse.
+
+
+  </help>
+  <citations>
+        <citation type="doi">10.1016/j.jchromb.2014.02.051</citation> <!-- example
+        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
+        -->
+   </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/next.r	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,9 @@
+# next script, useful for plotting the CAMERA groups pseudo-spectra:
+plotPsSpectrum( LC$xset,320,maxlabel=10)
+320 is group id
+plotEICs( LC$xset,35,maxlabel=5)
+
+
+could use this for the items identified by "annotate" step.
+could also integrate this to getEIC tool (add option to give group list instead of m/z list)...but then rt and other options
+   are also not so relevant...maybe a separate tool is more clear
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/tool_dependencies.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<tool_dependency>
+<!-- see also http://wiki.galaxyproject.org/ToolShedToolFeatures for syntax help
+   -->
+  	<package name="R_bioc_metams" version="3.1.1">
+		<repository changeset_revision="4b30bdaf4dbd" name="prims_metabolomics_r_dependencies" owner="pieterlukasse" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" />
+	</package>
+	<readme>
+				This dependency:
+				Ensures R 3.1.1 installation is triggered (via dependency).
+				Ensures Bioconductor 3.0 and package metaMS, multtest and snow are installed.
+      </readme>
+</tool_dependency>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/xcms_differential_analysis.r	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,85 @@
+## read args:
+args <- commandArgs(TRUE)
+#cat("args <- \"\"\n")
+## a xcms xset saved as .RData
+args.xsetData <- args[1]
+#cat(paste("args.xsetData <- \"", args[1], "\"\n", sep=""))
+
+args.class1 <- args[2]
+args.class2 <- args[3]
+#cat(paste("args.class1 <- \"", args[2], "\"\n", sep=""))
+#cat(paste("args.class2 <- \"", args[3], "\"\n", sep=""))
+
+args.topcount <- strtoi(args[4])
+#cat(paste("args.topcount <- ", args[4], "\n", sep=""))
+
+args.outTable <- args[5]
+
+## report files
+args.htmlReportFile <- args[6]
+args.htmlReportFile.files_path <- args[7]
+#cat(paste("args.htmlReportFile <- \"", args[6], "\"\n", sep=""))
+#cat(paste("args.htmlReportFile.files_path <- \"", args[7], "\"\n", sep=""))
+
+
+if (length(args) == 8)
+{
+	args.outLogFile <- args[8]
+	# suppress messages:
+	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
+	msg <- file(args.outLogFile, open="wt")
+	sink(msg, type="message")
+	sink(msg, type="output")
+}
+
+tryCatch(
+        {
+        	library(metaMS)
+        	library(xcms)
+	        #library("R2HTML")
+
+			# load the xset data :
+			xsetData <- readRDS(args.xsetData)
+			# if here to support both scenarios:
+			if ("xcmsSet" %in% slotNames(xsetData) )
+			{
+				xsetData <- xsetData@xcmsSet
+			}
+
+
+			# info: levels(xcmsSet@phenoData$class) also gives access to the class names
+			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
+			# set cairo as default for png plots:
+			png = function (...) grDevices::png(...,type='cairo')
+			# run diffreport
+			reporttab <- diffreport(xsetData, args.class1, args.class2, paste(args.htmlReportFile.files_path,"/fig", sep=""), args.topcount, metlin = 0.15, h=480, w=640)
+
+			# write out tsv table:
+			write.table(reporttab, args.outTable, sep="\t", row.names=FALSE)
+
+			message("\nGenerating report.........")
+
+			cat("<html><body><h1>Differential analysis report</h1>", file= args.htmlReportFile)
+			#HTML(reporttab[1:args.topcount,], file= args.htmlReportFile)
+			figuresPath <- paste(args.htmlReportFile.files_path, "/fig_eic", sep="")
+			message(figuresPath)
+			listOfFiles <- list.files(path = figuresPath)
+			for (i in 1:length(listOfFiles))
+			{
+				figureName <- listOfFiles[i]
+				# maybe we still need to copy the figures to the args.htmlReportFile.files_path
+				cat(paste("<img src='fig_eic/", figureName,"' />", sep=""), file= args.htmlReportFile, append=TRUE)
+				cat(paste("<img src='fig_box/", figureName,"' />", sep=""), file= args.htmlReportFile, append=TRUE)
+			}
+
+			message("finished generating report")
+			cat("\nWarnings================:\n")
+			str( warnings() )
+		},
+        error=function(cond) {
+            sink(NULL, type="message") # default setting
+			sink(stderr(), type="output")
+            message("\nERROR: ===========\n")
+            print(cond)
+        }
+    )
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/xcms_differential_analysis.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,55 @@
+<tool id="xcms_differential_analysis" name="XCMS Differential Analsysis"  version="0.0.4">
+	<description> Runs xcms diffreport function for differential Analsysis</description>
+	<requirements>
+		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
+	</requirements>
+	<command interpreter="Rscript">
+		xcms_differential_analysis.r
+	    $xsetData
+		"$class1"
+		"$class2"
+		$topcount
+		$outTable
+		$htmlReportFile
+		$htmlReportFile.files_path
+		$outLogFile
+	</command>
+<inputs>
+
+	<param name="xsetData" type="data" format="rdata" label="xset xcms data file" help="E.g. output data file resulting from METAMS run"/>
+
+
+	<param name="class1" type="text" size="30" label="Class1 name" value="" help="Name of first class for the comparison"/>
+	<param name="class2" type="text" size="30" label="Class2 name" value="" help="Name of second class for the comparison"/>
+
+	<param name="topcount" type="integer" size="10" value="10" label="Number of items to return" help="Top X differential items. E.g. if 10, it will return top 10 differential items." />
+
+</inputs>
+<outputs>
+	<data name="outTable" format="tabular" label="${tool.name} on ${on_string} - Top differential items (TSV)"/>
+	<data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - log (LOG)" hidden="True"/>
+	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - differential report (HTML)"/>
+</outputs>
+<tests>
+	<test>
+	</test>
+</tests>
+<help>
+
+.. class:: infomark
+
+Runs xcms diffreport for showing the most significant differences between two sets/classes of samples. This tool also creates extracted ion chromatograms (EICs) for
+the most significant differences. The figure below shows an example of such an EIC.
+
+.. image:: $PATH_TO_IMAGES/diffreport.png
+
+
+
+
+  </help>
+  <citations>
+        <citation type="doi">10.1021/ac051437y</citation> <!-- example
+        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
+        -->
+   </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/xcms_get_alignment_eic.r	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,153 @@
+# shows all alignment results in a rt region
+## read args:
+args <- commandArgs(TRUE)
+# xset data:
+args.xsetData <- args[1]
+
+args.rtStart  <- strtoi(args[2])
+args.rtEnd <- strtoi(args[3])
+
+# limit max diff to 600 and minNrSamples to at least 2 :
+if (args.rtEnd - args.rtStart > 600)
+	stop("The RT window should be <= 600")
+
+args.minNrSamples <- strtoi(args[4]) #min nr samples
+if (args.minNrSamples < 2)
+	stop("Set 'Minimum number of samples' to 2 or higher")
+
+
+args.sampleNames <- strsplit(args[5], ",")[[1]]
+# trim leading and trailing spaces:
+args.sampleNames <- gsub("^\\s+|\\s+$", "", args.sampleNames)
+
+## report files
+args.htmlReportFile <- args[6]
+args.htmlReportFile.files_path <- args[7]
+
+
+if (length(args) == 8)
+{
+	args.outLogFile <- args[8]
+	# suppress messages:
+	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
+	msg <- file(args.outLogFile, open="wt")
+	sink(msg, type="message")
+	sink(msg, type="output")
+}
+
+
+
+tryCatch(
+        {
+	        library(metaMS)
+
+			# load the xset data :
+			xsetData <- readRDS(args.xsetData)
+			# if here to support both scenarios:
+			if ("xcmsSet" %in% slotNames(xsetData) )
+			{
+				xsetData <- xsetData@xcmsSet
+			}
+
+			# report
+			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
+			message(paste("\nGenerating report.........in ", args.htmlReportFile.files_path))
+
+			write(paste("<html><body><h1>Extracted Ion Chromatograms (EIC) of alignments with peaks in ",args.minNrSamples, " or more samples</h1>"),
+			      file=args.htmlReportFile)
+
+			gt <- groups(xsetData)
+			message("\nParsed groups... ")
+			groupidx1 <- which(gt[,"rtmed"] > args.rtStart & gt[,"rtmed"] < args.rtEnd & gt[,"npeaks"] >= args.minNrSamples)  # this should be only on samples selected
+
+			if (length(groupidx1) > 0)
+			{
+				message("\nGetting EIC... ")
+				eiccor <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames)
+				#eicraw <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames, rt = "raw")
+
+				#sampleNamesIdx <- which(sampnames(LC$xset@xcmsSet) %in% args.sampleNames, arr.ind = TRUE)
+				#or (from bioconductor code for getEIC):  NB: this is assuming sample indexes used in data are ordered after the order of sample names!!
+				sampNames <- sampnames(xsetData)
+				sampleNamesIdx <- match( args.sampleNames, sampNames)
+				message(paste("Samples: ", sampleNamesIdx))
+
+				#TODO html <- paste(html, "<table><tbody>")
+				message(paste("\nPlotting figures... "))
+
+				#get the mz list (interestingly, this [,"mz"] is relatively slow):
+				peakMzList <- xsetData@peaks[,"mz"]
+				peakSampleList <- xsetData@peaks[,"sample"]
+				#signal to noise list:
+				peakSnList <- xsetData@peaks[,"sn"]
+
+				message(paste("Total nr of peaks: ", length(peakMzList)))
+
+				for (i in 1:length(groupidx1))
+				{
+					groupMembers <- xsetData@groupidx[[groupidx1[i]]]
+
+					groupMzList <- ""
+					groupSampleList <- ""
+					finalGroupSize <- 0
+
+					for (j in 1:length(groupMembers))
+					{
+						#get only the peaks from the selected samples:
+						memberSample <- peakSampleList[groupMembers[j]]
+						memberSn <- peakSnList[groupMembers[j]]
+						if (!is.na(memberSn) && memberSample %in% sampleNamesIdx)
+						{
+							message(paste("Group: ", groupidx1[i], " / Member sample: ", memberSample))
+							memberMz <- peakMzList[groupMembers[j]]
+
+
+							if (finalGroupSize == 0)
+							{
+								groupMzList <- memberMz
+								groupSampleList <- sampNames[memberSample]
+							} else {
+								groupMzList <- paste(groupMzList,",",memberMz, sep="")
+								groupSampleList <- paste(groupSampleList,",",sampNames[memberSample], sep="")
+							}
+
+							finalGroupSize <- finalGroupSize +1
+						}
+					}
+					# here we do the real check on group size and only the groups that have enough
+					# members with signal to noise > 0 will be plotted here:
+					if (finalGroupSize >= args.minNrSamples)
+					{
+						message(paste("Plotting figure ",i, " of max ", length(groupidx1)," figures... "))
+
+						figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="")
+						write(paste("<img src='", "figure", i,".png' />", sep="") ,
+			      				file=args.htmlReportFile, append=TRUE)
+
+						png( figureName, type="cairo", width=800 )
+						plot(eiccor, xsetData, groupidx = i)
+						devname = dev.off()
+
+						write(paste("<p>Alignment id: [", groupidx1[i], "]. m/z list of peaks in alignment with signal/noise <> NA:<br/>", groupMzList,"</p>", sep="") ,
+			      				file=args.htmlReportFile, append=TRUE)
+			      		write(paste("<p>m/z values found in the following samples respectively: <br/>", groupSampleList,"</p>", sep="") ,
+			      				file=args.htmlReportFile, append=TRUE)
+					}
+				}
+
+			}
+
+			write("</body><html>",
+			      	file=args.htmlReportFile, append=TRUE)
+			message("finished generating report")
+			# unlink(args.htmlReportFile)
+			cat("\nWarnings================:\n")
+			str( warnings() )
+		},
+        error=function(cond) {
+            sink(NULL, type="message") # default setting
+			sink(stderr(), type="output")
+            message("\nERROR: ===========\n")
+            print(cond)
+        }
+    )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/xcms_get_alignment_eic.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,73 @@
+<tool id="xcms_get_alignment_eic" name="XCMS Get Alignment EICs"  version="0.0.4">
+	<description> Extracts alignment EICs from feature alignment data</description>
+	<requirements>
+		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
+	</requirements>
+	<command interpreter="Rscript">
+		xcms_get_alignment_eic.r
+	    $xsetData
+		$rtStart
+		$rtEnd
+		$minNrSamples
+		"$sampleNames"
+		$htmlReportFile
+		$htmlReportFile.files_path
+		$outLogFile
+	</command>
+<inputs>
+
+	<param name="xsetData" type="data" format="rdata" label="xset xcms data file" help="E.g. output data file resulting from METAMS run"/>
+
+
+	<param name="rtStart" type="integer" value="" size="10" label="RT start" help="Start of Retention Time region to plot" />
+	<param name="rtEnd" type="integer" value="" size="10"  label="RT end" help="End of Retention Time region to plot" />
+
+	<param name="minNrSamples" type="integer" size="10" value="10" label="Minimum number of samples in which aligned feature should be found" help="Can also read this as: Minimum
+	number of features in alignment. E.g. if set to 10, it means the alignment should consist of at least 10 peaks from 10 different samples aligned together." />
+
+	<param name="sampleNames" type="text" area="true" size="10x70" label="List of sample names"
+	value="sampleName1,sampleName2,etc"
+	help="Comma or line-separated list of sample names. Optional field where you can specify the subset of samples
+	to use for the EIC plots. NB: if your dataset has many samples, specifying a subset here can significantly speed up the processing time." >
+		<sanitizer>
+			<!-- this translates from line-separated to comma separated list, removes quotes  -->
+			<valid/>
+			<mapping initial="none">
+		    	<add source="&#10;" target=","/>
+		    	<add source="&#13;" target=""/>
+		    	<add source="&quot;" target=""/>
+ 			</mapping>
+		</sanitizer>
+	</param>
+
+
+</inputs>
+<outputs>
+	<data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - log (LOG)" hidden="True"/>
+	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - EIC(s) report (HTML)"/>
+</outputs>
+<tests>
+	<test>
+	</test>
+</tests>
+<help>
+
+.. class:: infomark
+
+This tool finds the alignments in the specified RT window and extracts alignment EICs from feature alignment data using XCMS's getEIC method.
+It produces a HTML report showing the EIC plots and the mass list of each alignment. The figure below shows an example of such an EIC plot, showing also the difference between
+two classes, with extra alignment information beneath it.
+
+.. image:: $PATH_TO_IMAGES/diffreport.png
+
+Alignment id: 1709. m/z list of peaks in alignment:
+614.002922098482,613.998019830021,614.000382307519,613.998229980469,613.998229980469
+
+
+  </help>
+  <citations>
+        <citation type="doi">10.1021/ac051437y</citation> <!-- example
+        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
+        -->
+   </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/xcms_get_mass_eic.r	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,162 @@
+## read args:
+args <- commandArgs(TRUE)
+# xset data:
+args.xsetData <- args[1]
+
+args.rtStart  <- strtoi(args[2])
+args.rtEnd <- strtoi(args[3])
+
+args.mzStart <- as.double(args[4])
+args.mzEnd <- as.double(args[5])
+# there are 2 options: specify a mz range or a mz list:
+if (args.mzStart < 0)
+{
+	args.mzList <- as.double(strsplit(args[6], ",")[[1]])
+	cat(typeof(as.double(strsplit(args[6], ",")[[1]])))
+	args.mzTolPpm <- as.double(args[7])
+	# calculate mzends based on ppm tol:
+	mzListEnd <- c()
+	mzListStart <- c()
+	for (i in 1:length(args.mzList))
+	{
+		mzEnd <- args.mzList[i] + args.mzList[i]*args.mzTolPpm/1000000.0
+		mzStart <- args.mzList[i] - args.mzList[i]*args.mzTolPpm/1000000.0
+		mzListEnd <- c(mzListEnd, mzEnd)
+		mzListStart <- c(mzListStart, mzStart)
+	}
+	str(mzListStart)
+	str(mzListEnd)
+} else {
+	mzListEnd <- c(args.mzEnd)
+	mzListStart <- c(args.mzStart)
+}
+
+args.sampleNames <- strsplit(args[8], ",")[[1]]
+# trim leading and trailing spaces:
+args.sampleNames <- gsub("^\\s+|\\s+$", "", args.sampleNames)
+
+args.combineSamples <- args[9]
+args.rtPlotMode <- args[10]
+
+## report files
+args.htmlReportFile <- args[11]
+args.htmlReportFile.files_path <- args[12]
+
+
+if (length(args) == 13)
+{
+	args.outLogFile <- args[13]
+	# suppress messages:
+	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
+	msg <- file(args.outLogFile, open="wt")
+	sink(msg, type="message")
+	sink(msg, type="output")
+}
+
+# TODO - add option to do masses in same plot (if given in same line oid) or in separate plots
+# TODO2 - let it run in parallel
+
+tryCatch(
+        {
+	        library(metaMS)
+
+			# load the xset data :
+			xsetData <- readRDS(args.xsetData)
+			# if here to support both scenarios:
+			if ("xcmsSet" %in% slotNames(xsetData) )
+			{
+				xsetData <- xsetData@xcmsSet
+			}
+
+			# report
+			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
+			message(paste("\nGenerating report.........in ", args.htmlReportFile.files_path))
+
+			html <- "<html><body><h1>Extracted Ion Chromatograms (EIC) matching criteria</h1>"
+
+			if (args.combineSamples == "No")
+			{
+				if (length(args.sampleNames) > 1 && length(mzListStart) > 1 && length(args.sampleNames) != length(mzListStart))
+					stop(paste("The number of sample names should match the number of m/z values in the list. Found ", length(mzListStart),
+					          " masses while ",  length(args.sampleNames), " sample names were given."))
+
+		  		iterSize <- length(args.sampleNames)
+				# these can be set to 1 or 0 just as a trick to iterate OR not over the items. If the respective list is of length 1, only the first item should be used
+				fixSampleIdx <- 1
+				fixMzListIdx <- 1
+				if (length(args.sampleNames) == 1)
+				{
+					fixSampleIdx <- 0
+					iterSize <- length(mzListStart)
+				}
+				if (length(mzListStart) == 1)
+				{
+					fixMzListIdx <- 0
+				}
+				lineColors <- rainbow(iterSize)
+				for (i in 0:(iterSize-1))
+				{
+					message("\nGetting EIC... ")
+					eiccor <- getEIC(xsetData,
+										mzrange=matrix(c(mzListStart[i*fixMzListIdx+1],mzListEnd[i*fixMzListIdx+1]),nrow=1,ncol=2,byrow=TRUE),
+										rtrange=matrix(c(args.rtStart,args.rtEnd),nrow=1,ncol=2,byrow=TRUE),
+										sampleidx=c(args.sampleNames[i*fixSampleIdx+1]), rt=args.rtPlotMode)
+
+					message("\nPlotting figures... ")
+					figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="")
+					html <- paste(html,"<img src='", "figure", i,".png' /><br/>", sep="")
+					png( figureName, type="cairo", width=1100,height=250 )
+					#plot(eiccor, col=lineColors[i+1])
+					# black is better in this case:
+					plot(eiccor)
+					legend('topright', # places a legend at the appropriate place
+							legend=c(args.sampleNames[i*fixSampleIdx+1]), # puts text in the legend
+							lty=c(1,1), # gives the legend appropriate symbols (lines)
+							lwd=c(2.5,2.5))
+
+					devname = dev.off()
+				}
+
+			} else {
+				for (i in 1:length(mzListStart))
+				{
+					message("\nGetting EIC... ")
+					eiccor <- getEIC(xsetData,
+										mzrange=matrix(c(mzListStart[i],mzListEnd[i]),nrow=1,ncol=2,byrow=TRUE),
+										rtrange=matrix(c(args.rtStart,args.rtEnd),nrow=1,ncol=2,byrow=TRUE),
+										sampleidx=args.sampleNames, rt = args.rtPlotMode)
+
+										#set size, set option (plot per sample, plot per mass)
+
+					message("\nPlotting figures... ")
+					figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="")
+					html <- paste(html,"<img src='", "figure", i,".png' />", sep="")
+					png( figureName, type="cairo", width=1100,height=450 )
+					lineColors <- rainbow(length(args.sampleNames))
+					plot(eiccor, col=lineColors)
+					legend('topright', # places a legend at the appropriate place
+						  legend=args.sampleNames, # puts text in the legend
+						  lty=c(1,1), # gives the legend appropriate symbols (lines)
+						  lwd=c(2.5,2.5),
+				          col=lineColors)
+					devname = dev.off()
+				}
+			}
+			if (args.rtPlotMode == "corrected")
+			{
+				html <- paste(html,"<p>*rt values are corrected ones</p></body><html>")
+			}
+			html <- paste(html,"</body><html>")
+			message("finished generating report")
+			write(html,file=args.htmlReportFile)
+			# unlink(args.htmlReportFile)
+			cat("\nWarnings================:\n")
+			str( warnings() )
+		},
+        error=function(cond) {
+            sink(NULL, type="message") # default setting
+			sink(stderr(), type="output")
+            message("\nERROR: ===========\n")
+            print(cond)
+        }
+    )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS/xcms_get_mass_eic.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,117 @@
+<tool id="xcms_get_mass_eic" name="XCMS Get EICs"  version="0.0.4">
+	<description> Extracts EICs for a given list of masses</description>
+	<requirements>
+		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
+	</requirements>
+	<command interpreter="Rscript">
+		xcms_get_mass_eic.r
+	    $xsetData
+		$rtStart
+		$rtEnd
+		#if $massParameters.massParametersType == "window"
+			$massParameters.mzStart
+			$massParameters.mzEnd
+			-1
+			"."
+		#else
+			-1
+			-1
+			"$massParameters.mzList"
+			$massParameters.mzTolPpm
+		#end if
+		"$sampleNames"
+		$combineSamples
+		$rtPlotMode
+		$htmlReportFile
+		$htmlReportFile.files_path
+		$outLogFile
+	</command>
+<inputs>
+
+	<param name="xsetData" type="data" format="rdata" label="xset xcms data file" help="E.g. output data file resulting from METAMS run"/>
+
+
+	<param name="rtStart" type="integer" value="" size="10" label="RT start" help="Start of Retention Time region to plot" />
+	<param name="rtEnd" type="integer" value="" size="10"  label="RT end" help="End of Retention Time region to plot" />
+
+	<conditional name="massParameters">
+		<param name="massParametersType" type="select" size="50" label="Give masses as" >
+			<option value="list" selected="true">m/z list</option>
+			<option value="window" >m/z window</option>
+		</param>
+		<when value="list">
+			<param name="mzList" type="text" area="true" size="7x70" label="m/z list"
+				help="Comma or line-separated list of m/z values for which to plot an EIC. One EIC will be plotted for each m/z given here.">
+				<sanitizer>
+					<!-- this translates from line-separated to comma separated list, removes quotes -->
+					<valid/>
+					<mapping initial="none">
+				    	<add source="&#10;" target=","/>
+				    	<add source="&#13;" target=""/>
+				    	<add source="&quot;" target=""/>
+		 			</mapping>
+				</sanitizer>
+			</param>
+			<param name="mzTolPpm" type="integer" size="10" value="5" label="m/z tolerance (ppm)"  />
+		</when>
+		<when value="window">
+			<param name="mzStart" type="float" value="" size="10" label="m/z start" help="Start of m/z window" />
+			<param name="mzEnd" type="float" value="" size="10"  label="m/z end" help="End of m/z window" />
+   		</when>
+	</conditional>
+
+
+	<param name="sampleNames" type="text" area="true" size="10x70" label="List of sample names"
+		value="sampleName1,sampleName2,etc"
+		help="Comma or line-separated list of sample names. Here you can specify the subset of samples
+		to use for the EIC plots. NB: if your dataset has many samples, specifying a subset here can significantly speed up the processing time." >
+		<sanitizer>
+			<!-- this translates from line-separated to comma separated list, removes quotes  -->
+			<valid/>
+			<mapping initial="none">
+		    	<add source="&#10;" target=","/>
+		    	<add source="&#13;" target=""/>
+		    	<add source="&quot;" target=""/>
+ 			</mapping>
+		</sanitizer>
+	</param>
+
+	<param name="combineSamples" type="select" size="50" label="Combine samples in EIC"
+	       help="Combining samples means plot contains the combined EIC of a m/z in the different samples. When NOT combining, each plot
+	       only contains the EIC of the m/z in the respectively given sample (the sample name from the sample list in the same position as the
+	       m/z in the m/z list.). Tip: use Yes for visualizing EIC of grouped masses (MsClust or CAMERA groups), use No for visualizing EICs of the same mass in
+	       the different samples.">
+		<option value="No" selected="true">No</option>
+		<option value="Yes" >Yes</option>
+	</param>
+	<param name="rtPlotMode" type="select" size="50" label="RT plot mode"
+	       help="Select whether EIC should be on original or raw Retention Time">
+		<option value="raw" selected="true">Raw</option>
+		<option value="corrected" >Corrected</option>
+	</param>
+</inputs>
+<outputs>
+	<data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - log (LOG)" hidden="True"/>
+	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - EIC(s) report (HTML)"/>
+</outputs>
+<tests>
+	<test>
+	</test>
+</tests>
+<help>
+
+.. class:: infomark
+
+This tool will plot EICs for a given list of masses (or a mass window) using XCMS's getEIC method.
+It produces a HTML report showing the EIC plots, one for each given mass. The figure below shows an example of such an EIC plot.
+
+.. image:: $PATH_TO_IMAGES/massEIC.png
+
+
+  </help>
+  <citations>
+        <citation type="doi">10.1021/ac051437y</citation> <!-- example
+        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
+        -->
+   </citations>
+</tool>
\ No newline at end of file
--- a/metaMS_cmd_annotate.r	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-## read args:
-args <- commandArgs(TRUE)
-## the constructed DB, e.g. "E:/Rworkspace/metaMS/data/LCDBtest.RData"
-args.constructedDB <- args[1]
-## data file in xset format:
-args.xsetData <- args[2]
-## settings file, e.g. "E:/Rworkspace/metaMS/data/settings.r", should contain assignment to an object named "customMetaMSsettings"
-args.settings <- args[3]
-
-## output file names, e.g. "E:/Rworkspace/metaMS/data/out.txt"
-args.outAnnotationTable <- args[4]
-
-args.mass_error_function <- args[5]
-if (args.mass_error_function == "0")
-	args.mass_error_function <- NULL
-## report files
-args.htmlReportFile <- args[6]
-args.htmlReportFile.files_path <- args[7]
-
-if (length(args) == 8)
-{
-	args.outLogFile <- args[8]
-	# suppress messages:
-	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
-	msg <- file(args.outLogFile, open="wt")
-	sink(msg, type="message")
-	sink(msg, type="output")
-}
-
-cat("\nSettings used===============:\n")
-cat(readChar(args.settings, 1e5))
-
-
-tryCatch(
-        {
-	        library(metaMS)
-
-			## load the constructed DB :
-			tempEnv <- new.env()
-			testDB <- load(args.constructedDB, envir=tempEnv)
-			xsetData <- readRDS(args.xsetData)
-
-			## load settings "script" into "customMetaMSsettings"
-			source(args.settings, local=tempEnv)
-			message(paste(" loaded : ", args.settings))
-
-			# Just to highlight: if you want to use more than one
-			# trigger runLC:
-			LC <- runLC(xset=xsetData, settings = tempEnv[["customMetaMSsettings"]], DB = tempEnv[[testDB[1]]]$DB, errf=args.mass_error_function, nSlaves=20, returnXset = TRUE)
-
-			# write out runLC annotation results:
-			write.table(LC$PeakTable, args.outAnnotationTable, sep="\t", row.names=FALSE)
-
-			# the used constructed DB (write to log):
-			cat("\nConstructed DB info===============:\n")
-			str(tempEnv[[testDB[1]]]$Info)
-			cat("\nConstructed DB table===============:\n")
-			if (length(args) == 8)
-			{
-				write.table(tempEnv[[testDB[1]]]$DB, args.outLogFile, append=TRUE, row.names=FALSE)
-				write.table(tempEnv[[testDB[1]]]$Reftable, args.outLogFile, sep="\t", append=TRUE, row.names=FALSE)
-			}
-
-			message("\nGenerating report.........")
-			# report
-			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
-			html <- "<html><body><h1>Summary of annotation results:</h1>"
-			nrTotalFeatures <- nrow(LC$PeakTable)
-			nrAnnotatedFeatures <- nrow(LC$Annotation$annotation.table)
-			html <- paste(html,"<p>Total nr of features: ", nrTotalFeatures,"</p>", sep="")
-			html <- paste(html,"<p>Total nr of annotated features: ", nrAnnotatedFeatures,"</p>", sep="")
-
-			html <- paste(html,"</body><html>")
-			message("finished generating report")
-			write(html,file=args.htmlReportFile)
-			# unlink(args.htmlReportFile)
-			cat("\nWarnings================:\n")
-			str( warnings() )
-		},
-        error=function(cond) {
-            sink(NULL, type="message") # default setting
-			sink(stderr(), type="output")
-            message("\nERROR: ===========\n")
-            print(cond)
-        }
-    )
--- a/metaMS_cmd_pick_and_group.r	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-## read args:
-args <- commandArgs(TRUE)
-## data files, e.g. "E:/Rworkspace/metaMS/data/data.zip" (with e.g. .CDF files) and unzip output dir, e.g. "E:/"
-args.dataZip <- args[1]
-args.zipExtrDir <- sub("\\.","_",paste(args[1],"dir", sep=""))
-dir.create(file.path(args.zipExtrDir), showWarnings = FALSE, recursive = TRUE)
-## settings file, e.g. "E:/Rworkspace/metaMS/data/settings.r", should contain assignment to an object named "customMetaMSsettings"
-args.settings <- args[2]
-
-## output file names, e.g. "E:/Rworkspace/metaMS/data/out.txt"
-args.outPeakTable <- args[3]
-args.xsetOut <- args[4]
-
-# polarity as explicit parameter:
-args.runLC_polarity <- args[5]
-
-## report files
-args.htmlReportFile <- args[6]
-args.htmlReportFile.files_path <- args[7]
-
-
-if (length(args) == 8)
-{
-	args.outLogFile <- args[8]
-	# suppress messages:
-	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
-	msg <- file(args.outLogFile, open="wt")
-	sink(msg, type="message")
-	sink(msg, type="output")
-}
-
-cat("\nSettings used===============:\n")
-cat(readChar(args.settings, 1e5))
-
-
-tryCatch(
-        {
-	        library(metaMS)
-
-			## load the data files from a zip file
-			files <- unzip(args.dataZip, exdir=args.zipExtrDir)
-
-			## load settings "script" into "customMetaMSsettings"
-			tempEnv <- new.env()
-			source(args.settings, local=tempEnv)
-			message(paste(" loaded : ", args.settings))
-			allSettings <- tempEnv[["customMetaMSsettings"]]
-
-			# trigger runLC:
-			LC <- runLC(files, settings = allSettings, polarity=args.runLC_polarity, nSlaves=20, returnXset = TRUE)
-
-			# write out runLC annotation results:
-			write.table(LC$PeakTable, args.outPeakTable, sep="\t", row.names=FALSE)
-
-			# save xset as rdata:
-			xsAnnotatePreparedData <- LC$xset
-			saveRDS(xsAnnotatePreparedData, file=args.xsetOut)
-
-			message("\nGenerating report.........")
-			# report
-			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
-			html <- "<html><body><h1>Info on alignment quality </h1>"
-			# TODO add (nr and mass error) and group size
-
-			message("\nPlotting figures... ")
-			figureName <- paste(args.htmlReportFile.files_path, "/figure_retcor.png", sep="")
-			html <- paste(html,"<img src='figure_retcor.png' /><br/>", sep="")
-			png( figureName, type="cairo", width=1100,height=600 )
-			retcor(LC$xset@xcmsSet, method="peakgroups", plottype = "mdevden")
-			html <- paste(html,"<a>*NB: retention time correction plot based on 'peakgroups' option with default settings. This is not the plot matching the exact settings used in the run,
-									but just intended to give a rough estimate of the retention time shifts present in the data. A more accurate plot will be available once
-                                    this option is added in metaMS API. </a><br/>", sep="")
-			devname = dev.off()
-
-
-			gt <- groups(LC$xset@xcmsSet)
-			groupidx1 <- which(gt[,"rtmed"] > 0 & gt[,"rtmed"] < 3000 & gt[,"npeaks"] > 3)
-
-			html <- paste(html,"</body><html>")
-			message("finished generating report")
-			write(html,file=args.htmlReportFile)
-			# unlink(args.htmlReportFile)
-			cat("\nWarnings================:\n")
-			str( warnings() )
-		},
-        error=function(cond) {
-            sink(NULL, type="message") # default setting
-			sink(stderr(), type="output")
-            message("\nERROR: ===========\n")
-            print(cond)
-        }
-    )
--- a/metams_lcms_annotate.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-<tool id="metams_lcms_annotate" name="METAMS-LC/MS Annotate"  version="0.0.4">
-	<description> Runs metaMS process for LC/MS feature annotation</description>
-	<requirements>
-		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
-	</requirements>
-	<command interpreter="Rscript">
-		metaMS_cmd_annotate.r
-	    $constructed_db
-	    $xsetData
-	    $customMetaMSsettings
-	    $outputFile
-	    #if $mzTol.mzTolType == "fixed"
-			0
-		#else
-            "$mzTol.mass_error_function"
-		#end if
-	    $htmlReportFile
-	    $htmlReportFile.files_path
-	    $outputLog
-	</command>
-<inputs>
-	<param name="constructed_db" type="select" label="Constructed DB" help="Reference annotation database generated from matching measurements of a mixture of chemical standards
-	against a manually validated reference table which contains the key analytical information for each standard."
-      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/metaMS")'/>
-
-	<param name="xsetData" type="data" format="rdata" label="xcmsSet data file (xset RDATA)" help="E.g. output data file resulting from METAMS 'feature picking, aligning and grouping' run"/>
-
-	<param name="protocolName" type="text" size="30" label="protocolName" value="e.g. Synapt.QTOF.RP"
-		help="Choose a name to give for the specific settings in the parameters below"/>
-
-	<param name="rtdiff" type="float" size="10" value="1.5" label="rtdiff" help="(Annotation) Allowed rt difference (in minutes)"/>
-
-	<conditional name="mzTol">
-		<param name="mzTolType" type="select" size="30" label="(Annotation) m/z tolerance type">
-			<option value="fixed" selected="true">Fixed tolerance</option>
-			<option value="adaptive" >Adaptive tolerance</option>
-		</param>
-		<when value="fixed">
-			<param name="mzdiff" type="float" size="10" value="0.005" label="mzdiff" help="(Annotation) Fixed mass tolerance" />
-		</when>
-		<when value="adaptive">
-			<param name="ppm" type="float" size="10" value="5.0" label="ppm" help="(Annotation) Tolerance in ppm" />
-			<param name="mass_error_function" type="text" area="true" size="3x70" label="(Annotation) Mass error function"/>
-		</when>
-	</conditional>
-
-	<param name="rtval" type="float" size="10" value="0.1" label="(max)rtval" help="(Validation) Group items are clustered once more with hierarchical clustering ('complete' method)
-	          based on their rt distances. Here one can specify the rt threshold for removing the items that have too diverging rt (the ones with rt difference
-	          larger than rtval). " />
-	<param name="minfeat" type="integer" size="10" value="2" label="minfeat"
-	           help="(Validation) Threshold for the minimum number of features a
-	           cluster/group should have (after rtval filtering above). Other clusters/groups are filtered out." />
-
-</inputs>
-<configfiles>
-
-<configfile name="customMetaMSsettings">## start comment
-		## metaMS process settings
-		customMetaMSsettings &lt;- metaMSsettings(protocolName = "${protocolName}",
-                            chrom = "LC")
-metaSetting(customMetaMSsettings, "match2DB") &lt;- list(
-            rtdiff = ${rtdiff},
-            rtval = ${rtval},
-		#if $mzTol.mzTolType == "fixed"
-			mzdiff = ${mzTol.mzdiff},
-		#else
-            ppm = ${mzTol.ppm},
-		#end if
-            minfeat = ${minfeat})</configfile>
-
-</configfiles>
-
-<outputs>
-	<data name="outputFile" format="tabular" label="${tool.name} on ${on_string} - metaMS annotated file (TSV)"/>
-	<data name="outputLog" format="txt" label="${tool.name} on ${on_string} - metaMS LOG" hidden="True"/>
-	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - metaMS report (HTML)"/>
-</outputs>
-<tests>
-	<test>
-	</test>
-</tests>
-<code file="match_library.py" /> <!-- file containing get_directory_files function used above-->
-<help>
-
-.. class:: infomark
-
-Runs metaMS process for LC/MS feature annotation based on matching to an existing 'standards' DB.
-The figure below shows the main parts of this metaMS process.
-
-.. image:: $PATH_TO_IMAGES/metaMS_annotate.png
-
-
-.. class:: infomark
-
-The implemented annotation strategy can be broken down in the following steps:
-
-1. *Feature wise Annotation:* Each feature detected by runLC is matched against the database. If
-the mass error function is provided, the appropriate m/z tolerance is calculated, otherwise a fixed
-tolerance is used (mzdiff). The retention time tolerance is fixed and should be selected on the
-bases of the characteristics of each chromatographic method (rtdiff). Multiple annotations - i.e.
-features which are associated to more than one compound - are possible. This outcome does not
-indicate a problem per se, but is an inherent drawback of co-elution.
-
-2. *Annotation Validation:* The annotated features are organized in 'pseudospectra' collecting all
-the experimental features which are assigned to a specific compound. A specific annotation is
-confirmed only if more than minfeat features which differ in retention time less than rtval are
-present in a pseudospectrum. As a general rule rtval should be narrower than rtdiff. The
-latter, indeed, accounts for shifts in retention time between the injection of the standards and the
-metabolomics experiment under investigation. This time can be rather long, considering that the
-standards are not commonly re-analyzed each time. On the other hand, rtval represents the shift
-between the ions of the same compound within the same batch of injections and therefore it has
-only to account for the smaller shifts occurring during peak picking and alignment.
-
-
-  </help>
-  <citations>
-        <citation type="doi">10.1016/j.jchromb.2014.02.051</citation> <!-- example
-        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
-        -->
-   </citations>
-</tool>
\ No newline at end of file
--- a/metams_lcms_pick_and_group.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,301 +0,0 @@
-<tool id="metams_lcms_pick_and_group" name="METAMS-LC/MS Pick, Align and Group"  version="0.0.4">
-	<description> Runs metaMS process for LC/MS feature picking, aligning and grouping</description>
-	<requirements>
-		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
-	</requirements>
-	<command interpreter="Rscript">
-		metaMS_cmd_pick_and_group.r
-	    $data_files
-	    $customMetaMSsettings
-	    $outputFile
-	    $xsetOut
-	    $polarity
-	    $htmlReportFile
-	    $htmlReportFile.files_path
-	    $outputLog
-	</command>
-<inputs>
-	<param name="data_files" type="data" format="prims.fileset.zip" label="Data files (.zip file with CDF, mzML or mzXML files)" help=".zip file containing the CDF, mzML or mzXML files of the new measurements"/>
-
-	<param name="protocolName" type="text" size="30" label="protocolName" value="e.g. Synapt.QTOF.RP"
-		help="Choose a name to give for the specific settings in the parameters below"/><!-- TODO - let user choose this -->
-
-	<param name="polarity" type="select" size="30" label="polarity"
-		help="Which polarity mode was used for measuring of the ms sample">
-		<option value="positive" selected="true">positive</option>
-		<option value="negative" >negative</option>
-	</param>
-
-
-	<!-- ===========NB : if peak picking, alignment OR CAMERA settings have to be reused for runGC wrapper in the future, we can use Galaxy macro expansions here
-	                     to avoid defining these parameters again in the runGC wrapper ========================= -->
-	<conditional name="peakPicking">
-		<param name="method" type="select" size="30" label="PEAK PICKING method ====================================================="
-		help="matchedFilter=Feature detection in the chromatographic time domain ; centWave=Feature detection for high resolution LC/MS data">
-			<option value="matchedFilter" selected="true">matchedFilter</option>
-			<option value="centWave" >centWave</option>
-		</param>
-		<when value="matchedFilter">
-			<param name="fwhm" type="integer" size="10" value="20" label="fwhm"
-			help="full width at half maximum of matched filtration gaussian model peak. Only used to calculate the actual sigma" />
-			<param name="sigma_denom" type="float" size="10" value="2.3548" label="sigma_denominator"
-			help="denominator for standard deviation (width) of matched filtration model peak (e.g. sigma = fwhm/2.3548)" />
-			<param name="max" type="integer" size="10" value="50" label="max"
-			help="maximum number of peaks per extracted ion chromatogram" />
-			<param name="snthresh" type="integer" size="10" value="4" label="snthresh"
-			help="signal to noise ratio cutoff" />
-			<param name="step" type="float" size="10" value="0.05" label="step"
-			help="step size to use for profile generation"/>
-			<param name="steps" type="integer" size="10" value="2" label="steps"
-			help="number of steps to merge prior to filtration"/>
-			<param name="mzdiff" type="float" size="10" value="0.8" label="mzdiff"
-			help="minimum difference in m/z for peaks with overlapping retention times"/>
-		</when>
-		<when value="centWave">
-			<param name="ppm" type="integer" size="10" value="25" label="ppm"
-			help="maxmial tolerated m/z deviation in consecutive scans, in ppm" />
-			<param name="peakwidth" type="text" size="10" value="20,50" label="peakwidth"
-			help="Chromatographic peak width, given as range (min,max) in seconds" />
-			<param name="snthresh" type="integer" size="10" value="10" label="snthresh"
-			help="signal to noise ratio cutoff" />
-			<param name="prefilter" type="text" size="10" value="3,100" label="prefilter=c(k,I)"
-				help="Prefilter step for the first phase. Mass traces are only retained if
-				they contain at least k peaks with intensity &gt; = I" />
-			<param name="mzCenterFun" type="select" size="30" label="mzCenterFun"
-				help="Function to calculate the m/z center of the feature: wMean intensity weighted mean of the
-				feature m/z values, mean mean of the feature m/z values, apex use m/z value at peak apex,
-				wMeanApex3 intensity weighted mean of the m/z value at peak apex and the m/z value left and
-				right of it, meanApex3 mean of the m/z value at peak apex and the m/z value left and right of it">
-				<option value="wMean" selected="true">wMean</option>
-				<option value="mean" >mean</option>
-				<option value="apex" >apex</option>
-				<option value="wMeanApex3" >wMeanApex3</option>
-				<option value="meanApex3" >meanApex3</option>
-			</param>
-			<param name="integrate" type="select" size="30" label="integrate"
-				help="Integration method. If =1 peak limits are found through descent
-				on the mexican hat filtered data, if =2 the descent is done on the real data.
-				Method 2 is very accurate but prone to noise, while method 1 is more robust to noise but less exact">
-				<option value="1" selected="true">1</option>
-				<option value="2" >2</option>
-			</param>
-			<param name="mzdiff" type="float" size="10" value="-0.001" label="mzdiff"
-				help="minimum difference in m/z for peaks with overlapping retention times, can be negative to allow overlap" />
-			<param name="fitgauss" type="integer" size="10" value="20" label="fitgauss"
-				help="logical, if Yes: a Gaussian is fitted to each peak" >
-				<option value="TRUE" selected="true">Yes</option>
-				<option value="FALSE" >No</option>
-			</param>
-			<param name="noise" type="integer" size="10" value="0" label="noise"
-				help="optional argument which is useful for data that was centroided without any intensity
-				 threshold, centroids with intensity &lt; noise are omitted from ROI detection" />
-		</when>
-	</conditional>
-
-
-	<param name="min_class_fraction" type="float" size="10" value="0.3" label="ALIGNMENT min.class.fraction ====================================================="
-		    help="Minimum fraction of samples necessary in the alignment to make it a valid alignment/group"/>
-	<param name="min_class_size" type="integer" size="10" value="3" label="min.class.size"
-			help="Minimum number of samples necessary in the alignment to make it a valid alignment/group. The lowest criteria
-			between this and min.class.fraction will be used." />
-	<param name="mzwid" type="float" size="10" value="0.1" label="mzwid"
-			help="width of overlapping m/z slices to use for creating peak density chromatograms and grouping peaks across samples"/>
-	<param name="bws" type="text" size="10" value="30,10" label="bws"
-			help="bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel
-			to apply to the peak density chromatogram. Fill in two values separated by comma. First value is used for
-			first grouping round, seccond value is used for last grouping/alignment round after retention time
-			correction. "/>
-
-	<conditional name="retcor">
-		<param name="retcormethod" type="select" size="30" label="retcormethod"
-			help="retention time correction method. 'peakgroups' is the default density based approach, 'obiwarp' is
-			  alignment data by Ordered Bijective Interpolated Warping ">
-			<option value="peakgroups" selected="true">peakgroups</option>
-			<option value="obiwarp" >obiwarp</option>
-		</param>
-		<when value="peakgroups">
-			<param name="retcorfamily" type="select" size="30" label="retcorfamily"
-				help="retention time correction method type/family">
-				<option value="symmetric" selected="true">symmetric</option>
-				<option value="gaussian">gaussian</option>
-			</param>
-			<param name="smooth" type="select" size="30" label="smooth"
-				help="either 'loess' for non-linear alignment or 'linear' for linear alignment">
-				<option value="linear" selected="true">linear</option>
-				<option value="loess">loess (TODO - waiting for metaMS to add/parse this option)</option>
-			</param>
-			<param name="missingratio" type="float" size="10" value="0.2" label="missingratio"
-				help="ratio of missing samples to allow in retention time correction groups"/>
-			<param name="extraratio" type="float" size="10" value="0.1" label="extraratio"
-				help="ratio of extra peaks to allow in retention time correction correction groups"/>
-		</when>
-		<when value="obiwarp">
-			<param name="profStep" type="integer" size="10" value="1" label="profStep"
-				help="step size (in m/z) to use for profile generation from the raw data files" />
-		</when>
-	</conditional>
-
-	<param name="fillPeaks" type="select" size="30" label="fillPeaks"
-		help="Fill missing peaks in peak groups/alignments that do not include peaks from every sample.
-		This method produces intensity values for those missing samples by integrating raw data in peak group region.">
-		<option value="TRUE" selected="true">Yes</option>
-		<option value="FALSE">No</option>
-	</param>
-	<param name="perfwhm" type="float" size="10" value="0.6" label="CAMERA perfwhm ====================================================="
-		help="percentage of FWHM width"/>
-	<param name="cor_eic_th" type="float" size="10" value="0.7" label="cor_eic_th"
-		help="correlation threshold (0..1)" />
-	<param name="ppm" type="float" size="10" value="5.0" label="ppm"
-		help="General ppm error" />
-
-	<param name="groupCorr_graphMethod" type="select" size="30" label="(groupCorr)graphMethod"
-		help="Method selection for grouping peaks after correlation analysis into pseudospectra.">
-		<option value="hcs" selected="true">hcs</option>
-	</param>
-
-	<param name="groupCorr_pval" type="float" size="10" value="0.05" label="(groupCorr)pval"
-		help="significant correlation threshold" />
-
-	<param name="groupCorr_calcCiS" type="select" size="30" label="(groupCorr)calcCiS"
-		help="Use correlation inside samples for peak grouping">
-		<option value="TRUE" selected="true">Yes</option>
-		<option value="FALSE">No</option>
-	</param>
-
-	<param name="groupCorr_calcIso" type="select" size="30" label="(groupCorr)calcIso"
-		help="Use isotopic relationship for peak grouping">
-		<option value="TRUE" >Yes</option>
-		<option value="FALSE" selected="true">No</option>
-	</param>
-
-	<param name="groupCorr_calcCaS" type="select" size="30" label="(groupCorr)calcCaS"
-		help="Use correlation across samples for peak grouping">
-		<option value="TRUE" >Yes</option>
-		<option value="FALSE" selected="true">No</option>
-	</param>
-
-
-	<param name="findIsotopes_maxcharge" type="integer" size="10" value="3" label="(findIsotopes)maxcharge"
-			help="max. ion charge" />
-
-	<param name="findIsotopes_maxiso" type="integer" size="10" value="4" label="(findIsotopes)maxiso"
-			help="max. number of expected isotopes" />
-
-	<param name="findIsotopes_minfrac" type="float" size="10" value="0.5" label="(findIsotopes)minfrac"
-		help="The ratio for the number of samples, which must satisfy the C12/C13 rule for isotope annotation" />
-
-
-	<param name="findAdducts_multiplier" type="integer" size="10" value="3" label="(findAdducts)multiplier"
-			help="If no ruleset is provided, calculate ruleset with max. number n of [nM+x] clusterions" />
-
-
-
-</inputs>
-<configfiles>
-
-<configfile name="customMetaMSsettings">## ====================================
-		## metaMS process settings
-		customMetaMSsettings &lt;- metaMSsettings(protocolName = "${protocolName}",
-                            chrom = "LC",
-                            PeakPicking = list(
-                              method = "${peakPicking.method}",
-							#if $peakPicking.method == "matchedFilter"
-								fwhm = ${peakPicking.fwhm},
-								sigma = ${peakPicking.fwhm}/${peakPicking.sigma_denom},
-								max = ${peakPicking.max},
-								snthresh = ${peakPicking.snthresh},
-								step = ${peakPicking.step},
-								steps = ${peakPicking.steps},
-								mzdiff = ${peakPicking.mzdiff}),
-							#else
-								ppm = ${peakPicking.ppm},
-								peakwidth = c(${peakPicking.peakwidth}),
-								snthresh = ${peakPicking.snthresh},
-								prefilter = c(${peakPicking.prefilter}),
-								mzCenterFun = "${peakPicking.mzCenterFun}",
-								integrate = ${peakPicking.integrate},
-								mzdiff = ${peakPicking.mzdiff},
-								fitgauss = ${peakPicking.fitgauss},
-								noise = ${peakPicking.noise}),
-							#end if
-                            Alignment = list(
-                              min.class.fraction = ${min_class_fraction},
-                              min.class.size = ${min_class_size},
-                              mzwid = ${mzwid},
-                              bws = c(${bws}),
-                              retcormethod = "${retcor.retcormethod}",
-							#if $retcor.retcormethod == "peakgroups"
-                           		smooth = "${retcor.smooth}",
-                            	missingratio = ${retcor.missingratio},
-                            	extraratio = ${retcor.extraratio},
-                            	retcorfamily = "${retcor.retcorfamily}",
-							#else
-								##repeating the method as workaround/ backwards compatibility (can remove this one after fix from metaMS):
-								method = "${retcor.retcormethod}",
-								profStep = ${retcor.profStep},
-							#end if
-                              fillPeaks = ${fillPeaks}),
-                            CAMERA = list(
-                              perfwhm = ${perfwhm},
-                              cor_eic_th = ${cor_eic_th},
-                              ppm= ${ppm},
-                              graphMethod= "${groupCorr_graphMethod}",
-                              pval= ${groupCorr_pval},
-                              calcCiS= ${groupCorr_calcCiS},
-                              calcIso= ${groupCorr_calcIso},
-                              calcCaS= ${groupCorr_calcCaS},
-                              maxcharge= ${findIsotopes_maxcharge},
-                              maxiso= ${findIsotopes_maxiso},
-                              minfrac= ${findIsotopes_minfrac},
-                              multiplier= ${findAdducts_multiplier}
-                              ))</configfile>
-
-</configfiles>
-
-<outputs>
-	<data name="outputFile" format="tabular" label="${tool.name} on ${on_string} - peaks table (TSV)"/>
-	<data name="outputLog" format="txt" label="${tool.name} on ${on_string} - LOG" hidden="True"/>
-	<data name="xsetOut" format="rdata" label="${tool.name} on ${on_string} - xcmsSet (RDATA)"/>
-	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - report (HTML)"/>
-</outputs>
-<tests>
-	<test>
-	</test>
-</tests>
-<help>
-
-.. class:: infomark
-
-Runs metaMS process for LC/MS feature feature picking, aligning and grouping.
-This part of the metaMS process makes use of the XCMS and CAMERA tools and algorithms.
-CAMERA is used for automatic deconvolution/annotation of LC/ESI-MS data.
-The figure below shows the main parts of the metaMS process wrapped by this tool.
-
-.. image:: $PATH_TO_IMAGES/metaMS_pick_align_camera.png
-
-
-From CAMERA documentation:
-
-.. image:: $PATH_TO_IMAGES/CAMERA_results.png
-
-**References**
-
-If you use this Galaxy tool in work leading to a scientific publication please
-cite the following papers:
-
-Wehrens, R.; Weingart, G.; Mattivi, F. (2014).
-metaMS: an open-source pipeline for GC-MS-based untargeted metabolomics.
-Journal of chromatography B: biomedical sciences and applications, 996 (1): 109-116.
-doi: 10.1016/j.jchromb.2014.02.051
-handle: http://hdl.handle.net/10449/24012
-
-Wrapper by Pieter Lukasse.
-
-
-  </help>
-  <citations>
-        <citation type="doi">10.1016/j.jchromb.2014.02.051</citation> <!-- example
-        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
-        -->
-   </citations>
-</tool>
\ No newline at end of file
--- a/next.r	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-# next script, useful for plotting the CAMERA groups pseudo-spectra:
-plotPsSpectrum( LC$xset,320,maxlabel=10)
-320 is group id
-plotEICs( LC$xset,35,maxlabel=5)
-
-
-could use this for the items identified by "annotate" step.
-could also integrate this to getEIC tool (add option to give group list instead of m/z list)...but then rt and other options
-   are also not so relevant...maybe a separate tool is more clear
\ No newline at end of file
--- a/query_mass_repos.py	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,289 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-'''
-Module to query a set of accurate mass values detected by high-resolution mass spectrometers
-against various repositories/services such as METabolomics EXPlorer database or the
-MFSearcher service (http://webs2.kazusa.or.jp/mfsearcher/).
-
-It will take the input file and for each record it will query the
-molecular mass in the selected repository/service. If one or more compounds are found
-then extra information regarding these compounds is added to the output file.
-
-The output file is thus the input file enriched with information about
-related items found in the selected repository/service.
-
-The service should implement the following interface:
-
-http://service_url/mass?targetMs=500&margin=1&marginUnit=ppm&output=txth   (txth means there is guaranteed to be a header line before the data)
-
-The output should be tab separated and should contain the following columns (in this order)
-db-name    molecular-formula    dbe    formula-weight    id    description
-
-
-'''
-import csv
-import sys
-import fileinput
-import urllib2
-import time
-from collections import OrderedDict
-
-__author__ = "Pieter Lukasse"
-__contact__ = "pieter.lukasse@wur.nl"
-__copyright__ = "Copyright, 2014, Plant Research International, WUR"
-__license__ = "Apache v2"
-
-def _process_file(in_xsv, delim='\t'):
-    '''
-    Generic method to parse a tab-separated file returning a dictionary with named columns
-    @param in_csv: input filename to be parsed
-    '''
-    data = list(csv.reader(open(in_xsv, 'rU'), delimiter=delim))
-    return _process_data(data)
-
-def _process_data(data):
-
-    header = data.pop(0)
-    # Create dictionary with column name as key
-    output = OrderedDict()
-    for index in xrange(len(header)):
-        output[header[index]] = [row[index] for row in data]
-    return output
-
-
-def _query_and_add_data(input_data, molecular_mass_col, repository_dblink, error_margin, margin_unit):
-
-    '''
-    This method will iterate over the record in the input_data and
-    will enrich them with the related information found (if any) in the
-    chosen repository/service
-
-    # TODO : could optimize this with multi-threading, see also nice example at http://stackoverflow.com/questions/2846653/python-multithreading-for-dummies
-    '''
-    merged = []
-
-    for i in xrange(len(input_data[input_data.keys()[0]])):
-        # Get the record in same dictionary format as input_data, but containing
-        # a value at each column instead of a list of all values of all records:
-        input_data_record = OrderedDict(zip(input_data.keys(), [input_data[key][i] for key in input_data.keys()]))
-
-        # read the molecular mass :
-        molecular_mass = input_data_record[molecular_mass_col]
-
-
-        # search for related records in repository/service:
-        data_found = None
-        if molecular_mass != "":
-            molecular_mass = float(molecular_mass)
-
-            # 1- search for data around this MM:
-            query_link = repository_dblink + "/mass?targetMs=" + str(molecular_mass) + "&margin=" + str(error_margin) + "&marginUnit=" + margin_unit + "&output=txth"
-
-            data_found = _fire_query_and_return_dict(query_link + "&_format_result=tsv")
-            data_type_found = "MM"
-
-
-        if data_found == None:
-            # If still nothing found, just add empty columns
-            extra_cols = ['', '','','','','']
-        else:
-            # Add info found:
-            extra_cols = _get_extra_info_and_link_cols(data_found, data_type_found, query_link)
-
-        # Take all data and merge it into a "flat"/simple array of values:
-        field_values_list = _merge_data(input_data_record, extra_cols)
-
-        merged.append(field_values_list)
-
-    # return the merged/enriched records:
-    return merged
-
-
-def _get_extra_info_and_link_cols(data_found, data_type_found, query_link):
-    '''
-    This method will go over the data found and will return a
-    list with the following items:
-    - details of hits found :
-         db-name    molecular-formula    dbe    formula-weight    id    description
-    - Link that executes same query
-
-    '''
-
-    # set() makes a unique list:
-    db_name_set = []
-    molecular_formula_set = []
-    id_set = []
-    description_set = []
-
-
-    if 'db-name' in data_found:
-        db_name_set = set(data_found['db-name'])
-    elif '# db-name' in data_found:
-        db_name_set = set(data_found['# db-name'])
-    if 'molecular-formula' in data_found:
-        molecular_formula_set = set(data_found['molecular-formula'])
-    if 'id' in data_found:
-        id_set = set(data_found['id'])
-    if 'description' in data_found:
-        description_set = set(data_found['description'])
-
-    result = [data_type_found,
-              _to_xsv(db_name_set),
-              _to_xsv(molecular_formula_set),
-              _to_xsv(id_set),
-              _to_xsv(description_set),
-              #To let Excel interpret as link, use e.g. =HYPERLINK("http://stackoverflow.com", "friendly name"):
-              "=HYPERLINK(\""+ query_link + "\", \"Link to entries found in DB \")"]
-    return result
-
-
-def _to_xsv(data_set):
-    result = ""
-    for item in data_set:
-        result = result + str(item) + "|"
-    return result
-
-
-def _fire_query_and_return_dict(url):
-    '''
-    This method will fire the query as a web-service call and
-    return the results as a list of dictionary objects
-    '''
-
-    try:
-        data = urllib2.urlopen(url).read()
-
-        # transform to dictionary:
-        result = []
-        data_rows = data.split("\n")
-
-        # remove comment lines if any (only leave the one that has "molecular-formula" word in it...compatible with kazusa service):
-        data_rows_to_remove = []
-        for data_row in data_rows:
-            if data_row == "" or (data_row[0] == '#' and "molecular-formula" not in data_row):
-                data_rows_to_remove.append(data_row)
-
-        for data_row in data_rows_to_remove:
-            data_rows.remove(data_row)
-
-        # check if there is any data in the response:
-        if len(data_rows) <= 1 or data_rows[1].strip() == '':
-            # means there is only the header row...so no hits:
-            return None
-
-        for data_row in data_rows:
-            if not data_row.strip() == '':
-                row_as_list = _str_to_list(data_row, delimiter='\t')
-                result.append(row_as_list)
-
-        # return result processed into a dict:
-        return _process_data(result)
-
-    except urllib2.HTTPError, e:
-        raise Exception( "HTTP error for URL: " + url + " : %s - " % e.code + e.reason)
-    except urllib2.URLError, e:
-        raise Exception( "Network error: %s" % e.reason.args[1] + ". Administrator: please check if service [" + url + "] is accessible from your Galaxy server. ")
-
-def _str_to_list(data_row, delimiter='\t'):
-    result = []
-    for column in data_row.split(delimiter):
-        result.append(column)
-    return result
-
-
-# alternative: ?
-#     s = requests.Session()
-#     s.verify = False
-#     #s.auth = (token01, token02)
-#     resp = s.get(url, params={'name': 'anonymous'}, stream=True)
-#     content = resp.content
-#     # transform to dictionary:
-
-
-
-
-def _merge_data(input_data_record, extra_cols):
-    '''
-    Adds the extra information to the existing data record and returns
-    the combined new record.
-    '''
-    record = []
-    for column in input_data_record:
-        record.append(input_data_record[column])
-
-
-    # add extra columns
-    for column in extra_cols:
-        record.append(column)
-
-    return record
-
-
-def _save_data(data_rows, headers, out_csv):
-    '''
-    Writes tab-separated data to file
-    @param data_rows: dictionary containing merged/enriched dataset
-    @param out_csv: output csv file
-    '''
-
-    # Open output file for writing
-    outfile_single_handle = open(out_csv, 'wb')
-    output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
-
-    # Write headers
-    output_single_handle.writerow(headers)
-
-    # Write one line for each row
-    for data_row in data_rows:
-        output_single_handle.writerow(data_row)
-
-def _get_repository_URL(repository_file):
-    '''
-    Read out and return the URL stored in the given file.
-    '''
-    file_input = fileinput.input(repository_file)
-    try:
-        for line in file_input:
-            if line[0] != '#':
-                # just return the first line that is not a comment line:
-                return line
-    finally:
-        file_input.close()
-
-
-def main():
-    '''
-    Query main function
-
-    The input file can be any tabular file, as long as it contains a column for the molecular mass.
-    This column is then used to query against the chosen repository/service Database.
-    '''
-    seconds_start = int(round(time.time()))
-
-    input_file = sys.argv[1]
-    molecular_mass_col = sys.argv[2]
-    repository_file = sys.argv[3]
-    error_margin = float(sys.argv[4])
-    margin_unit = sys.argv[5]
-    output_result = sys.argv[6]
-
-    # Parse repository_file to find the URL to the service:
-    repository_dblink = _get_repository_URL(repository_file)
-
-    # Parse tabular input file into dictionary/array:
-    input_data = _process_file(input_file)
-
-    # Query data against repository :
-    enriched_data = _query_and_add_data(input_data, molecular_mass_col, repository_dblink, error_margin, margin_unit)
-    headers = input_data.keys() + ['SEARCH hits for ','SEARCH hits: db-names', 'SEARCH hits: molecular-formulas ',
-                                   'SEARCH hits: ids','SEARCH hits: descriptions', 'Link to SEARCH hits']  #TODO - add min and max formula weigth columns
-
-    _save_data(enriched_data, headers, output_result)
-
-    seconds_end = int(round(time.time()))
-    print "Took " + str(seconds_end - seconds_start) + " seconds"
-
-
-
-if __name__ == '__main__':
-    main()
--- a/query_mass_repos.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-<tool id="query_mass_repos"
-    name="METEXP - Find elemental composition formulas based on mass values "
-    version="0.1.0">
-  <description>Query multiple public repositories for elemental compositions from accurate mass values detected by high-resolution mass spectrometers</description>
-  <command interpreter="python">
-    query_mass_repos.py
-    $input_file
-    "$molecular_mass_col"
-    "$repository_file"
-    $error_margin
-    $margin_unit
-    $output_result
-  </command>
-  <inputs>
-
-   <param name="input_file" format="tabular" type="data"
-        label="Input file"
-    	help="Select a tabular file containing the entries to be queried/verified in the MetExp DB"/>
-
-   <param name="molecular_mass_col" type="text" size="50"
-           label="Molecular mass column name"
-           value="MM"
-           help="Name of the column containing the molecular mass information (in the given input file)" />
-
-   <param name="repository_file" type="select" label="Repository/service to query"
-      		 help="Select the repository/service which should be queried"
-      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/MetExp_MassSearch_Services")'/>
-
-   <param name="error_margin" type="float" size="10"
-           label="Error marging"
-           value="0.01"
-           help="Mass difference allowed when searching in the repositories for a mass match." />
-
-   <param name="margin_unit" type="select" label="Margin unit">
-	  	<option value="ms" selected="True">ms</option>
-	    <option value="ppm">ppm</option>
-   </param>
-   <!-- TODO
-   <param name="metexp_access_key" type="text" size="50"
-           label="(Optional)MetExp access key"
-           value=""
-           help="Key needed to get access to MetExp services. Fill in if MetExp service was selected" />    -->
-
-  </inputs>
-  <outputs>
-    <data name="output_result" format="tabular" label="${tool.name} on ${on_string}" />
-  </outputs>
-  <code file="match_library.py" /> <!-- file containing get_directory_files function used above-->
-  <help>
-.. class:: infomark
-
-This tool will query multiple public repositories such as PRI-MetExp or http://webs2.kazusa.or.jp/mfsearcher
-for elemental compositions from accurate mass values detected by high-resolution mass spectrometers.
-
-It will take the input file and for each record it will query the
-molecular mass in the selected repository. If one or more compounds are found in the
-repository then extra information regarding (mass based)matching elemental composition formulas is added to the output file.
-
-The output file is thus the input file enriched with information about
-related items found in the selected repository.
-
-**Notes**
-
-The input file can be any tabular file, as long as it contains a column for the molecular mass.
-
-**Services that can be queried**
-
-================= =========================================================================
-Database          Description
------------------ -------------------------------------------------------------------------
-PRI-MetExp        LC-MS and GC-MS data from experiments from the metabolomics group at
-                  Plant Research International. NB: restricted access to employees with
-                  access key.
-ExactMassDB       A database of possible elemental compositions consits of C: 100,
-                  H: 200, O: 50, N: 10, P: 10, and S: 10, that satisfy the Senior and
-                  the Lewis valence rules.
-                  (via /mfsearcher/exmassdb/)
-ExactMassDB-HR2   HR2, which is one of the fastest tools for calculation of elemental
-                  compositions, filters some elemental compositions according to
-                  the Seven Golden Rules (Kind and Fiehn, 2007). The ExactMassDB-HR2
-                  database returns the same result as does HR2 with the same atom kind
-                  and number condition as that used in construction of the ExactMassDB.
-                  (via /mfsearcher/exmassdb-hr2/)
-Pep1000           A database of possible linear polypeptides that are
-                  constructed with 20 kinds of amino acids and having molecular
-                  weights smaller than 1000.
-                  (via /mfsearcher/pep1000/)
-KEGG              Re-calculated compound data from KEGG. Weekly updated.
-                  (via /mfsearcher/kegg/)
-KNApSAcK          Re-calculated compound data from KNApSAcK.
-                  (via /mfsearcher/knapsack/)
-Flavonoid Viewer  Re-calculated compound data from Flavonoid Viewer .
-                  (via /mfsearcher/flavonoidviewer/
-LipidMAPS         Re-calculated compound data from LIPID MAPS.
-                  (via /mfsearcher/lipidmaps/)
-HMDB              Re-calculated compound data from Human Metabolome Database (HMDB)
-                  Version 3.5.
-                  (via /mfsearcher/hmdb/)
-PubChem           Re-calculated compound data from PubChem. Monthly updated.
-                  (via /mfsearcher/pubchem/)
-================= =========================================================================
-
-Sources for table above: PRI-MetExp and http://webs2.kazusa.or.jp/mfsearcher
-
-  </help>
-</tool>
--- a/query_metexp.py	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,282 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-'''
-Module to query a set of identifications against the METabolomics EXPlorer database.
-
-It will take the input file and for each record it will query the
-molecular mass in the selected MetExp DB. If one or more compounds are found in the
-MetExp DB then extra information regarding these compounds is added to the output file.
-
-The output file is thus the input file enriched with information about
-related items found in the selected MetExp DB.
-'''
-import csv
-import sys
-import fileinput
-import urllib2
-import time
-from collections import OrderedDict
-
-__author__ = "Pieter Lukasse"
-__contact__ = "pieter.lukasse@wur.nl"
-__copyright__ = "Copyright, 2014, Plant Research International, WUR"
-__license__ = "Apache v2"
-
-def _process_file(in_xsv, delim='\t'):
-    '''
-    Generic method to parse a tab-separated file returning a dictionary with named columns
-    @param in_csv: input filename to be parsed
-    '''
-    data = list(csv.reader(open(in_xsv, 'rU'), delimiter=delim))
-    return _process_data(data)
-
-def _process_data(data):
-
-    header = data.pop(0)
-    # Create dictionary with column name as key
-    output = OrderedDict()
-    for index in xrange(len(header)):
-        output[header[index]] = [row[index] for row in data]
-    return output
-
-
-def _query_and_add_data(input_data, casid_col, formula_col, molecular_mass_col, metexp_dblink, separation_method):
-    '''
-    This method will iterate over the record in the input_data and
-    will enrich them with the related information found (if any) in the
-    MetExp Database.
-
-    # TODO : could optimize this with multi-threading, see also nice example at http://stackoverflow.com/questions/2846653/python-multithreading-for-dummies
-    '''
-    merged = []
-
-    for i in xrange(len(input_data[input_data.keys()[0]])):
-        # Get the record in same dictionary format as input_data, but containing
-        # a value at each column instead of a list of all values of all records:
-        input_data_record = OrderedDict(zip(input_data.keys(), [input_data[key][i] for key in input_data.keys()]))
-
-        # read the molecular mass and formula:
-        cas_id = input_data_record[casid_col]
-        formula = input_data_record[formula_col]
-        molecular_mass = input_data_record[molecular_mass_col]
-
-        # search for related records in MetExp:
-        data_found = None
-        if cas_id != "undef":
-            # 1- search for other experiments where this CAS id has been found:
-            query_link = metexp_dblink + "/find_entries/query?cas_nr="+ cas_id + "&method=" + separation_method
-            data_found = _fire_query_and_return_dict(query_link + "&_format_result=tsv")
-            data_type_found = "CAS"
-        if data_found == None:
-            # 2- search for other experiments where this FORMULA has been found:
-            query_link = metexp_dblink + "/find_entries/query?molecule_formula="+ formula + "&method=" + separation_method
-            data_found = _fire_query_and_return_dict(query_link + "&_format_result=tsv")
-            data_type_found = "FORMULA"
-        if data_found == None:
-            # 3- search for other experiments where this MM has been found:
-            query_link = metexp_dblink + "/find_entries/query?molecule_mass="+ molecular_mass + "&method=" + separation_method
-            data_found = _fire_query_and_return_dict(query_link + "&_format_result=tsv")
-            data_type_found = "MM"
-
-        if data_found == None:
-            # If still nothing found, just add empty columns
-            extra_cols = ['', '','','','','','','']
-        else:
-            # Add info found:
-            extra_cols = _get_extra_info_and_link_cols(data_found, data_type_found, query_link)
-
-        # Take all data and merge it into a "flat"/simple array of values:
-        field_values_list = _merge_data(input_data_record, extra_cols)
-
-        merged.append(field_values_list)
-
-    # return the merged/enriched records:
-    return merged
-
-
-def _get_extra_info_and_link_cols(data_found, data_type_found, query_link):
-    '''
-    This method will go over the data found and will return a
-    list with the following items:
-    - Experiment details where hits have been found :
-        'organism', 'tissue','experiment_name','user_name','column_type'
-    - Link that executes same query
-
-    '''
-    # set() makes a unique list:
-    organism_set = []
-    tissue_set = []
-    experiment_name_set = []
-    user_name_set = []
-    column_type_set = []
-    cas_nr_set = []
-
-    if 'organism' in data_found:
-        organism_set = set(data_found['organism'])
-    if 'tissue' in data_found:
-        tissue_set = set(data_found['tissue'])
-    if 'experiment_name' in data_found:
-        experiment_name_set = set(data_found['experiment_name'])
-    if 'user_name' in data_found:
-        user_name_set = set(data_found['user_name'])
-    if 'column_type' in data_found:
-        column_type_set = set(data_found['column_type'])
-    if 'CAS' in data_found:
-        cas_nr_set = set(data_found['CAS'])
-
-
-    result = [data_type_found,
-              _to_xsv(organism_set),
-              _to_xsv(tissue_set),
-              _to_xsv(experiment_name_set),
-              _to_xsv(user_name_set),
-              _to_xsv(column_type_set),
-              _to_xsv(cas_nr_set),
-              #To let Excel interpret as link, use e.g. =HYPERLINK("http://stackoverflow.com", "friendly name"):
-              "=HYPERLINK(\""+ query_link + "\", \"Link to entries found in DB \")"]
-    return result
-
-
-def _to_xsv(data_set):
-    result = ""
-    for item in data_set:
-        result = result + str(item) + "|"
-    return result
-
-
-def _fire_query_and_return_dict(url):
-    '''
-    This method will fire the query as a web-service call and
-    return the results as a list of dictionary objects
-    '''
-
-    try:
-        data = urllib2.urlopen(url).read()
-
-        # transform to dictionary:
-        result = []
-        data_rows = data.split("\n")
-
-        # check if there is any data in the response:
-        if len(data_rows) <= 1 or data_rows[1].strip() == '':
-            # means there is only the header row...so no hits:
-            return None
-
-        for data_row in data_rows:
-            if not data_row.strip() == '':
-                row_as_list = _str_to_list(data_row, delimiter='\t')
-                result.append(row_as_list)
-
-        # return result processed into a dict:
-        return _process_data(result)
-
-    except urllib2.HTTPError, e:
-        raise Exception( "HTTP error for URL: " + url + " : %s - " % e.code + e.reason)
-    except urllib2.URLError, e:
-        raise Exception( "Network error: %s" % e.reason.args[1] + ". Administrator: please check if MetExp service [" + url + "] is accessible from your Galaxy server. ")
-
-def _str_to_list(data_row, delimiter='\t'):
-    result = []
-    for column in data_row.split(delimiter):
-        result.append(column)
-    return result
-
-
-# alternative: ?
-#     s = requests.Session()
-#     s.verify = False
-#     #s.auth = (token01, token02)
-#     resp = s.get(url, params={'name': 'anonymous'}, stream=True)
-#     content = resp.content
-#     # transform to dictionary:
-
-
-
-
-def _merge_data(input_data_record, extra_cols):
-    '''
-    Adds the extra information to the existing data record and returns
-    the combined new record.
-    '''
-    record = []
-    for column in input_data_record:
-        record.append(input_data_record[column])
-
-
-    # add extra columns
-    for column in extra_cols:
-        record.append(column)
-
-    return record
-
-
-def _save_data(data_rows, headers, out_csv):
-    '''
-    Writes tab-separated data to file
-    @param data_rows: dictionary containing merged/enriched dataset
-    @param out_csv: output csv file
-    '''
-
-    # Open output file for writing
-    outfile_single_handle = open(out_csv, 'wb')
-    output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
-
-    # Write headers
-    output_single_handle.writerow(headers)
-
-    # Write one line for each row
-    for data_row in data_rows:
-        output_single_handle.writerow(data_row)
-
-def _get_metexp_URL(metexp_dblink_file):
-    '''
-    Read out and return the URL stored in the given file.
-    '''
-    file_input = fileinput.input(metexp_dblink_file)
-    try:
-        for line in file_input:
-            if line[0] != '#':
-                # just return the first line that is not a comment line:
-                return line
-    finally:
-        file_input.close()
-
-
-def main():
-    '''
-    MetExp Query main function
-
-    The input file can be any tabular file, as long as it contains a column for the molecular mass
-    and one for the formula of the respective identification. These two columns are then
-    used to query against MetExp Database.
-    '''
-    seconds_start = int(round(time.time()))
-
-    input_file = sys.argv[1]
-    casid_col = sys.argv[2]
-    formula_col = sys.argv[3]
-    molecular_mass_col = sys.argv[4]
-    metexp_dblink_file = sys.argv[5]
-    separation_method = sys.argv[6]
-    output_result = sys.argv[7]
-
-    # Parse metexp_dblink_file to find the URL to the MetExp service:
-    metexp_dblink = _get_metexp_URL(metexp_dblink_file)
-
-    # Parse tabular input file into dictionary/array:
-    input_data = _process_file(input_file)
-
-    # Query data against MetExp DB :
-    enriched_data = _query_and_add_data(input_data, casid_col, formula_col, molecular_mass_col, metexp_dblink, separation_method)
-    headers = input_data.keys() + ['METEXP hits for ','METEXP hits: organisms', 'METEXP hits: tissues',
-                                   'METEXP hits: experiments','METEXP hits: user names','METEXP hits: column types', 'METEXP hits: CAS nrs', 'Link to METEXP hits']
-
-    _save_data(enriched_data, headers, output_result)
-
-    seconds_end = int(round(time.time()))
-    print "Took " + str(seconds_end - seconds_start) + " seconds"
-
-
-
-if __name__ == '__main__':
-    main()
--- a/query_metexp.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-<tool id="query_metexp"
-    name="METEXP - Query Database "
-    version="0.1.0">
-  <description>Query a set of identifications against the METabolomics EXPeriments database</description>
-  <command interpreter="python">
-    query_metexp.py
-    $input_file
-    "$casid_col"
-    "$formula_col"
-    "$molecular_mass_col"
-    "$metexp_dblink_file"
-    $separation_method
-    $output_result
-  </command>
-  <inputs>
-
-   <param name="input_file" format="tabular" type="data"
-        label="Input file"
-    	help="Select a tabular file containing the entries to be queried/verified in the MetExp DB"/>
-
-  <param name="separation_method" type="select" label="Data type to query">
-  	<option value="GC" selected="True">GC</option>
-    <option value="LC">LC</option>
-  </param>
-
-
-   <param name="casid_col" type="text" size="50"
-           label="CAS ID column name"
-           value="CAS"
-           help="Name of the column containing the CAS code information (in the given input file)" />
-   <param name="formula_col" type="text" size="50"
-           label="Formula ID column name"
-           value="FORMULA"
-           help="Name of the column containing the formula information (in the given input file)" />
-   <param name="molecular_mass_col" type="text" size="50"
-           label="Molecular mass column name"
-           value="MM"
-           help="Name of the column containing the molecular mass information (in the given input file)" />
-
-   <param name="metexp_dblink_file" type="select" label="MetExp DB to query"
-      		 help="Select the MetExp Database/backend which should be queried"
-      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/MetExp_Databases")'/>
-
-
-  </inputs>
-  <outputs>
-    <data name="output_result" format="tabular" label="${tool.name} on ${on_string}" />
-  </outputs>
-  <code file="match_library.py" /> <!-- file containing get_directory_files function used above-->
-  <help>
-.. class:: infomark
-
-This tool will Query a set of identifications against the METabolomics EXPlorer database.
-
-It will take the input file and for each record it will query the
-molecular mass in the selected MetExp DB. If one or more compounds are found in the
-MetExp DB then extra information regarding these compounds is added to the output file.
-
-The output file is thus the input file enriched with information about
-related items found in the selected MetExp DB.
-
-**Notes**
-
-The input file can be any tabular file, as long as it contains a column for the molecular mass
-and one for the formula of the respective identification.
-
-
-  </help>
-</tool>
--- a/rankfilterGCMS_tabular.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-<tool id="rankfilterGCMS_tabular" name="RIQC-RankFilter GC-MS from tabular file" version="1.0.2">
-  <description>Convert Retention Time to Retention Index</description>
-  <command interpreter="python">rankfilter_GCMS/rankfilter.py $input_file</command>
-  <inputs>
-    <param format="tabular" name="sample" type="data" label="Sample File"
-	       help="Select a tab delimited NIST metabolite identifications file (converted from PDF)" />
-	<!-- question: is this calibration file not column specific as it includes RT info?? -->
-    <!-- this one should be input file for now:<param name="calibration"  type="select" label="Calibration File"
-           help="Calibration file with reference masses (e.g. alkanes) with their RT and RI values"
-    		dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RankFilter_Calibration_Files")'/>
-    		-->
-    <param name="calibration" format="any" type="data" label="Calibration File"
-           help="Calibration file containing reference masses (e.g. alkanes) with their respective RT and RI values"/>
-
-    <param name="analysis_type" type="select" format="text" label="Analysis Type"
-    	   help="Select the type of analysis that has been used to generate the sample file">
-      <option value="NIST">NIST</option>
-      <option value="AMDIS">AMDIS</option>
-    </param>
-    <param name="model" type="select" format="text" label="Select a model to be used "
-    	   help="Both linear and (3rd degree) polynomial models are available ">
-      <option value="linear">Linear</option>
-      <option value="poly">Polynomial</option>
-    </param>
-    <param name="lib_data" type="select" label="Library"
-	       help="Reference global lookup library file with CAS numbers and respective (previously calculated) RIsvr values"
-           dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RankFilter_lookup_libraries")'/>
-
-    <param name="window" type="float" label="Window" value="10.56" />
-  </inputs>
-  <outputs>
-    <data format="tabular" label="${tool.name}" name="onefile" />
-  </outputs>
-  <!-- file with implementation of the function get_directory_files() used above  -->
-  <code file="match_library.py" />
-  <configfiles>
-    <configfile name="input_file">
-      sample = ${sample}
-      calibration = ${calibration}
-      lib_data = ${lib_data}
-      window = ${window}
-      analysis_type = ${analysis_type}
-      tabular = True
-      onefile = ${onefile}
-      model = ${model}
-    </configfile>
-  </configfiles>
-  <help>
-Basically estimates the experimental RI (RIexp) by building a RI(RT) function based on the
-given calibration file.
-
-It also determines the estimated RI (RIsvr) by looking up for each entry of the given input file (Sample File),
-based on its CAS number, its respective RIsvr value in the given global lookup library
-(this step is also called the "RankFilter analysis" -see reference below; Sample File may be either from NIST or AMDIS).
-This generates an prediction of the RI for
-a compound according to the "RankFilter procedure" (RIsvr).
-
-Output is a tab separated file in which four columns are added:
-
-	- **Rank** Calculated rank
-	- **RIexp** Experimental Retention Index (RI)
-	- **RIsvr** Calculated RI based on support vector regression (SVR)
-	- **%rel.err** Relative RI error (%rel.error = 100 * (RISVR − RIexp) / RIexp)
-
-.. class:: infomark
-
-**Notes**
-
-	- The layout of the Calibration file should include the following columns: 'MW', 'R.T.' and 'RI'.
-	- Selecting 'Polynomial' in the model parameter will calculate a 3rd degree polynomial model that will
-	  be used to convert from XXXX to YYYY.
-
------
-
-**References**
-
-    - **RankFilter**: Mihaleva et. al. (2009) *Automated procedure for candidate compound selection in GC-MS
-      metabolomics based on prediction of Kovats retention index*. Bioinformatics, 25 (2009), pp. 787–794
-  </help>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rankfilter_GCMS/nistpdf_to_tabular.py	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,45 @@
+"""
+Copyright (C) 2013, Pieter Lukasse, Plant Research International, Wageningen
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this software except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+import pdfread
+from subprocess import call
+
+# NB: THIS TOOL SHOULD BE DEPRECATED ONCE NIST-Wrapper tool is stable and released. NIST-Wrapper tool already produces the necessary tabular file.
+
+def convert_pdftotext(filename, output_file):
+    '''
+    Converts PDF file to text
+    @param filename: PDF file to parse
+    @param output_file: output text file for the hits
+    '''
+
+    # "-layout" option in pdftotext call below: Maintain (as best as possible) the original physical layout of the text. The
+    #                                           default is to 'undo' physical layout (columns, hyphenation, etc.) and output
+    #                                           the text in reading order.
+    try:
+        call(["pdftotext", "-layout", filename, output_file])
+    except:
+        raise Exception("Error while trying to convert PDF to text")
+
+
+
+
+if __name__ == '__main__':
+    pdf_as_text = sys.argv[1]+".txt"
+    convert_pdftotext(sys.argv[1], pdf_as_text)
+    pdfread.convert_pdftotext2tabular(pdf_as_text, sys.argv[2], sys.argv[3], False)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rankfilter_GCMS/nistpdf_to_tabular.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,14 @@
+<tool id="NDIStext2tabular" name="NIST_UTIL- NIST text to tabular format" version="1.0.2">
+  <description>Convert NIST text to tabular format</description>
+  <command interpreter="python">nistpdf_to_tabular.py $input $output $output_err</command>
+  <inputs>
+    <param format="pdf" name="input" type="data" label="NIST identifications report (PDF)"/>
+  </inputs>
+  <outputs>
+    <data format="tabular" label="${tool.name} output on ${on_string}"  name="output" />
+    <data format="tabular" label="${tool.name} error log"  name="output_err" />
+  </outputs>
+  <help>
+    This tool converts NIST identification report output (PDF) to a tabular format needed for further use with the RIQC tools.
+  </help>
+</tool>
--- a/rankfilter_GCMS/pdftotabular.py	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-"""
-Copyright (C) 2013, Pieter Lukasse, Plant Research International, Wageningen
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this software except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-"""
-
-import sys
-import pdfread
-from subprocess import call
-
-
-def convert_pdftotext(filename, output_file):
-    '''
-    Converts PDF file to text
-    @param filename: PDF file to parse
-    @param output_file: output text file for the hits
-    '''
-
-    # "-layout" option in pdftotext call below: Maintain (as best as possible) the original physical layout of the text. The
-    #                                           default is to 'undo' physical layout (columns, hyphenation, etc.) and output
-    #                                           the text in reading order.
-    try:
-        call(["pdftotext", "-layout", filename, output_file])
-    except:
-        raise Exception("Error while trying to convert PDF to text")
-
-
-
-
-if __name__ == '__main__':
-    pdf_as_text = sys.argv[1]+".txt"
-    convert_pdftotext(sys.argv[1], pdf_as_text)
-    pdfread.convert_pdftotext2tabular(pdf_as_text, sys.argv[2], sys.argv[3], False)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rankfilter_GCMS/rankfilterGCMS_tabular.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,80 @@
+<tool id="rankfilterGCMS_tabular" name="RIQC-RankFilter GC-MS from tabular file" version="1.0.2">
+  <description>Convert Retention Time to Retention Index</description>
+  <command interpreter="python">rankfilter.py $input_file</command>
+  <inputs>
+    <param format="tabular" name="sample" type="data" label="Sample File"
+	       help="Select a tab delimited NIST metabolite identifications file (converted from PDF)" />
+	<!-- question: is this calibration file not column specific as it includes RT info?? -->
+    <!-- this one should be input file for now:<param name="calibration"  type="select" label="Calibration File"
+           help="Calibration file with reference masses (e.g. alkanes) with their RT and RI values"
+    		dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RankFilter_Calibration_Files")'/>
+    		-->
+    <param name="calibration" format="any" type="data" label="Calibration File"
+           help="Calibration file containing reference masses (e.g. alkanes) with their respective RT and RI values"/>
+
+    <param name="analysis_type" type="select" format="text" label="Analysis Type"
+    	   help="Select the type of analysis that has been used to generate the sample file">
+      <option value="NIST">NIST</option>
+      <option value="AMDIS">AMDIS</option>
+    </param>
+    <param name="model" type="select" format="text" label="Select a model to be used "
+    	   help="Both linear and (3rd degree) polynomial models are available ">
+      <option value="linear">Linear</option>
+      <option value="poly">Polynomial</option>
+    </param>
+    <param name="lib_data" type="select" label="Library"
+	       help="Reference global lookup library file with CAS numbers and respective (previously calculated) RIsvr values"
+           dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RankFilter_lookup_libraries")'/>
+
+    <param name="window" type="float" label="Window" value="10.56" />
+  </inputs>
+  <outputs>
+    <data format="tabular" label="${tool.name}" name="onefile" />
+  </outputs>
+  <!-- file with implementation of the function get_directory_files() used above  -->
+  <code file="match_library.py" />
+  <configfiles>
+    <configfile name="input_file">
+      sample = ${sample}
+      calibration = ${calibration}
+      lib_data = ${lib_data}
+      window = ${window}
+      analysis_type = ${analysis_type}
+      tabular = True
+      onefile = ${onefile}
+      model = ${model}
+    </configfile>
+  </configfiles>
+  <help>
+Basically estimates the experimental RI (RIexp) by building a RI(RT) function based on the
+given calibration file.
+
+It also determines the estimated RI (RIsvr) by looking up for each entry of the given input file (Sample File),
+based on its CAS number, its respective RIsvr value in the given global lookup library
+(this step is also called the "RankFilter analysis" -see reference below; Sample File may be either from NIST or AMDIS).
+This generates an prediction of the RI for
+a compound according to the "RankFilter procedure" (RIsvr).
+
+Output is a tab separated file in which four columns are added:
+
+	- **Rank** Calculated rank
+	- **RIexp** Experimental Retention Index (RI)
+	- **RIsvr** Calculated RI based on support vector regression (SVR)
+	- **%rel.err** Relative RI error (%rel.error = 100 * (RISVR − RIexp) / RIexp)
+
+.. class:: infomark
+
+**Notes**
+
+	- The layout of the Calibration file should include the following columns: 'MW', 'R.T.' and 'RI'.
+	- Selecting 'Polynomial' in the model parameter will calculate a 3rd degree polynomial model that will
+	  be used to convert from XXXX to YYYY.
+
+-----
+
+**References**
+
+    - **RankFilter**: Mihaleva et. al. (2009) *Automated procedure for candidate compound selection in GC-MS
+      metabolomics based on prediction of Kovats retention index*. Bioinformatics, 25 (2009), pp. 787–794
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rankfilter_GCMS/select_on_rank.py	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,21 @@
+import csv
+import sys
+
+__author__ = "Marcel Kempenaar"
+__contact__ = "brs@nbic.nl"
+__copyright__ = "Copyright, 2012, Netherlands Bioinformatics Centre"
+__license__ = "MIT"
+
+in_file = sys.argv[1]
+out_file = sys.argv[2]
+to_select_list = [str(select.strip()) for select in sys.argv[3].split(',') if (len(select) > 0)]
+
+data = list(csv.reader(open(in_file, 'rb'), delimiter='\t'))
+header = data.pop(0)
+header_clean = [i.lower().strip().replace(".", "").replace("%", "") for i in header]
+rank = header_clean.index("rank")
+
+writer = csv.writer(open(out_file, 'wb'), delimiter='\t')
+writer.writerow(header)
+for select in to_select_list:
+    writer.writerows([i for i in data if i[rank] == select])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rankfilter_GCMS/select_on_rank.xml	Thu Mar 19 12:22:23 2015 +0100
@@ -0,0 +1,15 @@
+<tool id="filter_on_rank" name="RIQC-Filter on rank" version="1.0.2">
+  <description>Filter on the Rank field in the RankFilter output file</description>
+  <command interpreter="python">select_on_rank.py $input $output "$rank"</command>
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Source file (RankFilter ouptut)"/>
+    <param format="tabular" help="Filter on (keep different values separate with a comma)" value ="1,2"
+	   name="rank" type="text" label="Select Ranks to keep"/>
+  </inputs>
+  <outputs>
+    <data format="tabular" label="${tool.name} on ${on_string} selected ${rank}"  name="output" />
+  </outputs>
+  <help>
+This tool removes all entries with non selected rank values from the input file (supported input file is a RankFilter output file).
+  </help>
+</tool>
--- a/rankfilter_text2tabular.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-<tool id="NDIStext2tabular" name="NIST_UTIL- NIST text to tabular format" version="1.0.2">
-  <description>Convert NIST text to tabular format</description>
-  <command interpreter="python">rankfilter_GCMS/pdftotabular.py $input $output $output_err</command>
-  <inputs>
-    <param format="pdf" name="input" type="data" label="NIST identifications report (PDF)"/>
-  </inputs>
-  <outputs>
-    <data format="tabular" label="${tool.name} output on ${on_string}"  name="output" />
-    <data format="tabular" label="${tool.name} error log"  name="output_err" />
-  </outputs>
-  <help>
-    This tool converts NIST identification report output (PDF) to a tabular format needed for further use with the RIQC tools.
-  </help>
-</tool>
--- a/select_on_rank.py	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-import csv
-import sys
-
-__author__ = "Marcel Kempenaar"
-__contact__ = "brs@nbic.nl"
-__copyright__ = "Copyright, 2012, Netherlands Bioinformatics Centre"
-__license__ = "MIT"
-
-in_file = sys.argv[1]
-out_file = sys.argv[2]
-to_select_list = [str(select.strip()) for select in sys.argv[3].split(',') if (len(select) > 0)]
-
-data = list(csv.reader(open(in_file, 'rb'), delimiter='\t'))
-header = data.pop(0)
-header_clean = [i.lower().strip().replace(".", "").replace("%", "") for i in header]
-rank = header_clean.index("rank")
-
-writer = csv.writer(open(out_file, 'wb'), delimiter='\t')
-writer.writerow(header)
-for select in to_select_list:
-    writer.writerows([i for i in data if i[rank] == select])
--- a/select_on_rank.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-<tool id="filter_on_rank" name="RIQC-Filter on rank" version="1.0.2">
-  <description>Filter on the Rank field in the RankFilter output file</description>
-  <command interpreter="python">select_on_rank.py $input $output "$rank"</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Source file (RankFilter ouptut)"/>
-    <param format="tabular" help="Filter on (keep different values separate with a comma)" value ="1,2"
-	   name="rank" type="text" label="Select Ranks to keep"/>
-  </inputs>
-  <outputs>
-    <data format="tabular" label="${tool.name} on ${on_string} selected ${rank}"  name="output" />
-  </outputs>
-  <help>
-This tool removes all entries with non selected rank values from the input file (supported input file is a RankFilter output file).
-  </help>
-</tool>
--- a/static_resources/README.txt	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-This folder and respective files should be deployed together with the following tools:
-
- - ../export_to_metexp_tabular.py
\ No newline at end of file
--- a/static_resources/elements_and_masses.tab	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-Name	Atomic number	Chemical symbol	Relative atomic mass
-Hydrogen	1	H	1.01
-Helium	2	He	4
-Lithium	3	Li	6.94
-Beryllium	4	Be	9.01
-Boron	5	B	10.81
-Carbon	6	C	12.01
-Nitrogen	7	N	14.01
-Oxygen	8	O	16
-Fluorine	9	F	19
-Neon	10	Ne	20.18
-Sodium	11	Na	22.99
-Magnesium	12	Mg	24.31
-Aluminum	13	Al	26.98
-Silicon	14	Si	28.09
-Phosphorus	15	P	30.98
-Sulfur	16	S	32.06
-Chlorine	17	Cl	35.45
-Argon	18	Ar	39.95
-Potassium	19	K	39.1
-Calcium	20	Ca	40.08
-Scandium	21	Sc	44.96
-Titanium	22	Ti	47.9
-Vanadium	23	V	50.94
-Chromium	24	Cr	52
-Manganese	25	Mn	54.94
-Iron	26	Fe	55.85
-Cobalt	27	Co	58.93
-Nickel	28	Ni	58.71
-Copper	29	Cu	63.54
-Zinc	30	Zn	65.37
-Gallium	31	Ga	69.72
-Germanium	32	Ge	72.59
-Arsenic	33	As	74.99
-Selenium	34	Se	78.96
-Bromine	35	Br	79.91
-Krypton	36	Kr	83.8
-Rubidium	37	Rb	85.47
-Strontium	38	Sr	87.62
-Yttrium	39	Y	88.91
-Zirconium	40	Zr	91.22
-Niobium	41	Nb	92.91
-Molybdenum	42	Mo	95.94
-Technetium	43	Tc	96.91
-Ruthenium	44	Ru	101.07
-Rhodium	45	Rh	102.9
-Palladium	46	Pd	106.4
-Silver	47	Ag	107.87
-Cadmium	48	Cd	112.4
-Indium	49	In	114.82
-Tin	50	Sn	118.69
-Antimony	51	Sb	121.75
-Tellurium	52	Te	127.6
-Iodine	53	I	126.9
-Xenon	54	Xe	131.3
-Cesium	55	Cs	132.9
-Barium	56	Ba	137.34
-Lanthanum	57	La	138.91
-Cerium	58	Ce	140.12
-Praseodymium	59	Pr	140.91
-Neodymium	60	Nd	144.24
-Promethium	61	Pm	144.91
-Samarium	62	Sm	150.35
-Europium	63	Eu	151.96
-Gadolinium	64	Gd	157.25
-Terbium	65	Tb	158.92
-Dysprosium	66	Dy	162.5
-Holmium	67	Ho	164.93
-Erbium	68	Er	167.26
-Thulium	69	Tm	168.93
-Ytterbium	70	Yb	173.04
-Lutetium	71	Lu	174.97
-Hafnium	72	Hf	178.49
-Tantalum	73	Ta	180.95
-Wolfram	74	W	183.85
-Rhenium	75	Re	186.2
-Osmium	76	Os	190.2
-Iridium	77	Ir	192.22
-Platinum	78	Pt	195.09
-Gold	79	Au	196.97
-Mercury	80	Hg	200.59
-Thallium	81	Tl	204.37
-Lead	82	Pb	207.19
-Bismuth	83	Bi	208.98
-Polonium	84	Po	208.98
-Astatine	85	At	209.99
-Radon	86	Rn	222.02
-Francium	87	Fr	223.02
-Radium	88	Ra	226
-Actinium	89	Ac	227.03
-Thorium	90	Th	232.04
-Protactinium	91	Pa	231.04
-Uranium	92	U	238.03
-Neptunium	93	Np	237
-Plutonium	94	Pu	242
-Americium	95	Am	243.06
-Curium	96	Cm	247.07
-Berkelium	97	Bk	247.07
-Californium	98	Cf	251.08
-Einsteinium	99	Es	254.09
-Fermium	100	Fm	257.1
-Mendelevium	101	Md	257.1
-Nobelium	102	No	255.09
-Lawrencium	103	Lr	256.1
--- a/tool_dependencies.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-<!-- see also http://wiki.galaxyproject.org/ToolShedToolFeatures for syntax help
-   -->
-  	<package name="R_bioc_metams" version="3.1.1">
-		<repository changeset_revision="4b30bdaf4dbd" name="prims_metabolomics_r_dependencies" owner="pieterlukasse" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" />
-	</package>
-	<readme>
-				This dependency:
-				Ensures R 3.1.1 installation is triggered (via dependency).
-				Ensures Bioconductor 3.0 and package metaMS, multtest and snow are installed.
-      </readme>
-</tool_dependency>
\ No newline at end of file
--- a/xcms_differential_analysis.r	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-## read args:
-args <- commandArgs(TRUE)
-#cat("args <- \"\"\n")
-## a xcms xset saved as .RData
-args.xsetData <- args[1]
-#cat(paste("args.xsetData <- \"", args[1], "\"\n", sep=""))
-
-args.class1 <- args[2]
-args.class2 <- args[3]
-#cat(paste("args.class1 <- \"", args[2], "\"\n", sep=""))
-#cat(paste("args.class2 <- \"", args[3], "\"\n", sep=""))
-
-args.topcount <- strtoi(args[4])
-#cat(paste("args.topcount <- ", args[4], "\n", sep=""))
-
-args.outTable <- args[5]
-
-## report files
-args.htmlReportFile <- args[6]
-args.htmlReportFile.files_path <- args[7]
-#cat(paste("args.htmlReportFile <- \"", args[6], "\"\n", sep=""))
-#cat(paste("args.htmlReportFile.files_path <- \"", args[7], "\"\n", sep=""))
-
-
-if (length(args) == 8)
-{
-	args.outLogFile <- args[8]
-	# suppress messages:
-	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
-	msg <- file(args.outLogFile, open="wt")
-	sink(msg, type="message")
-	sink(msg, type="output")
-}
-
-tryCatch(
-        {
-        	library(metaMS)
-        	library(xcms)
-	        #library("R2HTML")
-
-			# load the xset data :
-			xsetData <- readRDS(args.xsetData)
-			# if here to support both scenarios:
-			if ("xcmsSet" %in% slotNames(xsetData) )
-			{
-				xsetData <- xsetData@xcmsSet
-			}
-
-
-			# info: levels(xcmsSet@phenoData$class) also gives access to the class names
-			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
-			# set cairo as default for png plots:
-			png = function (...) grDevices::png(...,type='cairo')
-			# run diffreport
-			reporttab <- diffreport(xsetData, args.class1, args.class2, paste(args.htmlReportFile.files_path,"/fig", sep=""), args.topcount, metlin = 0.15, h=480, w=640)
-
-			# write out tsv table:
-			write.table(reporttab, args.outTable, sep="\t", row.names=FALSE)
-
-			message("\nGenerating report.........")
-
-			cat("<html><body><h1>Differential analysis report</h1>", file= args.htmlReportFile)
-			#HTML(reporttab[1:args.topcount,], file= args.htmlReportFile)
-			figuresPath <- paste(args.htmlReportFile.files_path, "/fig_eic", sep="")
-			message(figuresPath)
-			listOfFiles <- list.files(path = figuresPath)
-			for (i in 1:length(listOfFiles))
-			{
-				figureName <- listOfFiles[i]
-				# maybe we still need to copy the figures to the args.htmlReportFile.files_path
-				cat(paste("<img src='fig_eic/", figureName,"' />", sep=""), file= args.htmlReportFile, append=TRUE)
-				cat(paste("<img src='fig_box/", figureName,"' />", sep=""), file= args.htmlReportFile, append=TRUE)
-			}
-
-			message("finished generating report")
-			cat("\nWarnings================:\n")
-			str( warnings() )
-		},
-        error=function(cond) {
-            sink(NULL, type="message") # default setting
-			sink(stderr(), type="output")
-            message("\nERROR: ===========\n")
-            print(cond)
-        }
-    )
\ No newline at end of file
--- a/xcms_differential_analysis.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-<tool id="xcms_differential_analysis" name="XCMS Differential Analsysis"  version="0.0.4">
-	<description> Runs xcms diffreport function for differential Analsysis</description>
-	<requirements>
-		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
-	</requirements>
-	<command interpreter="Rscript">
-		xcms_differential_analysis.r
-	    $xsetData
-		"$class1"
-		"$class2"
-		$topcount
-		$outTable
-		$htmlReportFile
-		$htmlReportFile.files_path
-		$outLogFile
-	</command>
-<inputs>
-
-	<param name="xsetData" type="data" format="rdata" label="xset xcms data file" help="E.g. output data file resulting from METAMS run"/>
-
-
-	<param name="class1" type="text" size="30" label="Class1 name" value="" help="Name of first class for the comparison"/>
-	<param name="class2" type="text" size="30" label="Class2 name" value="" help="Name of second class for the comparison"/>
-
-	<param name="topcount" type="integer" size="10" value="10" label="Number of items to return" help="Top X differential items. E.g. if 10, it will return top 10 differential items." />
-
-</inputs>
-<outputs>
-	<data name="outTable" format="tabular" label="${tool.name} on ${on_string} - Top differential items (TSV)"/>
-	<data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - log (LOG)" hidden="True"/>
-	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - differential report (HTML)"/>
-</outputs>
-<tests>
-	<test>
-	</test>
-</tests>
-<help>
-
-.. class:: infomark
-
-Runs xcms diffreport for showing the most significant differences between two sets/classes of samples. This tool also creates extracted ion chromatograms (EICs) for
-the most significant differences. The figure below shows an example of such an EIC.
-
-.. image:: $PATH_TO_IMAGES/diffreport.png
-
-
-
-
-  </help>
-  <citations>
-        <citation type="doi">10.1021/ac051437y</citation> <!-- example
-        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
-        -->
-   </citations>
-</tool>
\ No newline at end of file
--- a/xcms_get_alignment_eic.r	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,153 +0,0 @@
-# shows all alignment results in a rt region
-## read args:
-args <- commandArgs(TRUE)
-# xset data:
-args.xsetData <- args[1]
-
-args.rtStart  <- strtoi(args[2])
-args.rtEnd <- strtoi(args[3])
-
-# limit max diff to 600 and minNrSamples to at least 2 :
-if (args.rtEnd - args.rtStart > 600)
-	stop("The RT window should be <= 600")
-
-args.minNrSamples <- strtoi(args[4]) #min nr samples
-if (args.minNrSamples < 2)
-	stop("Set 'Minimum number of samples' to 2 or higher")
-
-
-args.sampleNames <- strsplit(args[5], ",")[[1]]
-# trim leading and trailing spaces:
-args.sampleNames <- gsub("^\\s+|\\s+$", "", args.sampleNames)
-
-## report files
-args.htmlReportFile <- args[6]
-args.htmlReportFile.files_path <- args[7]
-
-
-if (length(args) == 8)
-{
-	args.outLogFile <- args[8]
-	# suppress messages:
-	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
-	msg <- file(args.outLogFile, open="wt")
-	sink(msg, type="message")
-	sink(msg, type="output")
-}
-
-
-
-tryCatch(
-        {
-	        library(metaMS)
-
-			# load the xset data :
-			xsetData <- readRDS(args.xsetData)
-			# if here to support both scenarios:
-			if ("xcmsSet" %in% slotNames(xsetData) )
-			{
-				xsetData <- xsetData@xcmsSet
-			}
-
-			# report
-			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
-			message(paste("\nGenerating report.........in ", args.htmlReportFile.files_path))
-
-			write(paste("<html><body><h1>Extracted Ion Chromatograms (EIC) of alignments with peaks in ",args.minNrSamples, " or more samples</h1>"),
-			      file=args.htmlReportFile)
-
-			gt <- groups(xsetData)
-			message("\nParsed groups... ")
-			groupidx1 <- which(gt[,"rtmed"] > args.rtStart & gt[,"rtmed"] < args.rtEnd & gt[,"npeaks"] >= args.minNrSamples)  # this should be only on samples selected
-
-			if (length(groupidx1) > 0)
-			{
-				message("\nGetting EIC... ")
-				eiccor <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames)
-				#eicraw <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames, rt = "raw")
-
-				#sampleNamesIdx <- which(sampnames(LC$xset@xcmsSet) %in% args.sampleNames, arr.ind = TRUE)
-				#or (from bioconductor code for getEIC):  NB: this is assuming sample indexes used in data are ordered after the order of sample names!!
-				sampNames <- sampnames(xsetData)
-				sampleNamesIdx <- match( args.sampleNames, sampNames)
-				message(paste("Samples: ", sampleNamesIdx))
-
-				#TODO html <- paste(html, "<table><tbody>")
-				message(paste("\nPlotting figures... "))
-
-				#get the mz list (interestingly, this [,"mz"] is relatively slow):
-				peakMzList <- xsetData@peaks[,"mz"]
-				peakSampleList <- xsetData@peaks[,"sample"]
-				#signal to noise list:
-				peakSnList <- xsetData@peaks[,"sn"]
-
-				message(paste("Total nr of peaks: ", length(peakMzList)))
-
-				for (i in 1:length(groupidx1))
-				{
-					groupMembers <- xsetData@groupidx[[groupidx1[i]]]
-
-					groupMzList <- ""
-					groupSampleList <- ""
-					finalGroupSize <- 0
-
-					for (j in 1:length(groupMembers))
-					{
-						#get only the peaks from the selected samples:
-						memberSample <- peakSampleList[groupMembers[j]]
-						memberSn <- peakSnList[groupMembers[j]]
-						if (!is.na(memberSn) && memberSample %in% sampleNamesIdx)
-						{
-							message(paste("Group: ", groupidx1[i], " / Member sample: ", memberSample))
-							memberMz <- peakMzList[groupMembers[j]]
-
-
-							if (finalGroupSize == 0)
-							{
-								groupMzList <- memberMz
-								groupSampleList <- sampNames[memberSample]
-							} else {
-								groupMzList <- paste(groupMzList,",",memberMz, sep="")
-								groupSampleList <- paste(groupSampleList,",",sampNames[memberSample], sep="")
-							}
-
-							finalGroupSize <- finalGroupSize +1
-						}
-					}
-					# here we do the real check on group size and only the groups that have enough
-					# members with signal to noise > 0 will be plotted here:
-					if (finalGroupSize >= args.minNrSamples)
-					{
-						message(paste("Plotting figure ",i, " of max ", length(groupidx1)," figures... "))
-
-						figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="")
-						write(paste("<img src='", "figure", i,".png' />", sep="") ,
-			      				file=args.htmlReportFile, append=TRUE)
-
-						png( figureName, type="cairo", width=800 )
-						plot(eiccor, xsetData, groupidx = i)
-						devname = dev.off()
-
-						write(paste("<p>Alignment id: [", groupidx1[i], "]. m/z list of peaks in alignment with signal/noise <> NA:<br/>", groupMzList,"</p>", sep="") ,
-			      				file=args.htmlReportFile, append=TRUE)
-			      		write(paste("<p>m/z values found in the following samples respectively: <br/>", groupSampleList,"</p>", sep="") ,
-			      				file=args.htmlReportFile, append=TRUE)
-					}
-				}
-
-			}
-
-			write("</body><html>",
-			      	file=args.htmlReportFile, append=TRUE)
-			message("finished generating report")
-			# unlink(args.htmlReportFile)
-			cat("\nWarnings================:\n")
-			str( warnings() )
-		},
-        error=function(cond) {
-            sink(NULL, type="message") # default setting
-			sink(stderr(), type="output")
-            message("\nERROR: ===========\n")
-            print(cond)
-        }
-    )
--- a/xcms_get_alignment_eic.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-<tool id="xcms_get_alignment_eic" name="XCMS Get Alignment EICs"  version="0.0.4">
-	<description> Extracts alignment EICs from feature alignment data</description>
-	<requirements>
-		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
-	</requirements>
-	<command interpreter="Rscript">
-		xcms_get_alignment_eic.r
-	    $xsetData
-		$rtStart
-		$rtEnd
-		$minNrSamples
-		"$sampleNames"
-		$htmlReportFile
-		$htmlReportFile.files_path
-		$outLogFile
-	</command>
-<inputs>
-
-	<param name="xsetData" type="data" format="rdata" label="xset xcms data file" help="E.g. output data file resulting from METAMS run"/>
-
-
-	<param name="rtStart" type="integer" value="" size="10" label="RT start" help="Start of Retention Time region to plot" />
-	<param name="rtEnd" type="integer" value="" size="10"  label="RT end" help="End of Retention Time region to plot" />
-
-	<param name="minNrSamples" type="integer" size="10" value="10" label="Minimum number of samples in which aligned feature should be found" help="Can also read this as: Minimum
-	number of features in alignment. E.g. if set to 10, it means the alignment should consist of at least 10 peaks from 10 different samples aligned together." />
-
-	<param name="sampleNames" type="text" area="true" size="10x70" label="List of sample names"
-	value="sampleName1,sampleName2,etc"
-	help="Comma or line-separated list of sample names. Optional field where you can specify the subset of samples
-	to use for the EIC plots. NB: if your dataset has many samples, specifying a subset here can significantly speed up the processing time." >
-		<sanitizer>
-			<!-- this translates from line-separated to comma separated list, removes quotes  -->
-			<valid/>
-			<mapping initial="none">
-		    	<add source="&#10;" target=","/>
-		    	<add source="&#13;" target=""/>
-		    	<add source="&quot;" target=""/>
- 			</mapping>
-		</sanitizer>
-	</param>
-
-
-</inputs>
-<outputs>
-	<data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - log (LOG)" hidden="True"/>
-	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - EIC(s) report (HTML)"/>
-</outputs>
-<tests>
-	<test>
-	</test>
-</tests>
-<help>
-
-.. class:: infomark
-
-This tool finds the alignments in the specified RT window and extracts alignment EICs from feature alignment data using XCMS's getEIC method.
-It produces a HTML report showing the EIC plots and the mass list of each alignment. The figure below shows an example of such an EIC plot, showing also the difference between
-two classes, with extra alignment information beneath it.
-
-.. image:: $PATH_TO_IMAGES/diffreport.png
-
-Alignment id: 1709. m/z list of peaks in alignment:
-614.002922098482,613.998019830021,614.000382307519,613.998229980469,613.998229980469
-
-
-  </help>
-  <citations>
-        <citation type="doi">10.1021/ac051437y</citation> <!-- example
-        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
-        -->
-   </citations>
-</tool>
\ No newline at end of file
--- a/xcms_get_mass_eic.r	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-## read args:
-args <- commandArgs(TRUE)
-# xset data:
-args.xsetData <- args[1]
-
-args.rtStart  <- strtoi(args[2])
-args.rtEnd <- strtoi(args[3])
-
-args.mzStart <- as.double(args[4])
-args.mzEnd <- as.double(args[5])
-# there are 2 options: specify a mz range or a mz list:
-if (args.mzStart < 0)
-{
-	args.mzList <- as.double(strsplit(args[6], ",")[[1]])
-	cat(typeof(as.double(strsplit(args[6], ",")[[1]])))
-	args.mzTolPpm <- as.double(args[7])
-	# calculate mzends based on ppm tol:
-	mzListEnd <- c()
-	mzListStart <- c()
-	for (i in 1:length(args.mzList))
-	{
-		mzEnd <- args.mzList[i] + args.mzList[i]*args.mzTolPpm/1000000.0
-		mzStart <- args.mzList[i] - args.mzList[i]*args.mzTolPpm/1000000.0
-		mzListEnd <- c(mzListEnd, mzEnd)
-		mzListStart <- c(mzListStart, mzStart)
-	}
-	str(mzListStart)
-	str(mzListEnd)
-} else {
-	mzListEnd <- c(args.mzEnd)
-	mzListStart <- c(args.mzStart)
-}
-
-args.sampleNames <- strsplit(args[8], ",")[[1]]
-# trim leading and trailing spaces:
-args.sampleNames <- gsub("^\\s+|\\s+$", "", args.sampleNames)
-
-args.combineSamples <- args[9]
-args.rtPlotMode <- args[10]
-
-## report files
-args.htmlReportFile <- args[11]
-args.htmlReportFile.files_path <- args[12]
-
-
-if (length(args) == 13)
-{
-	args.outLogFile <- args[13]
-	# suppress messages:
-	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
-	msg <- file(args.outLogFile, open="wt")
-	sink(msg, type="message")
-	sink(msg, type="output")
-}
-
-# TODO - add option to do masses in same plot (if given in same line oid) or in separate plots
-# TODO2 - let it run in parallel
-
-tryCatch(
-        {
-	        library(metaMS)
-
-			# load the xset data :
-			xsetData <- readRDS(args.xsetData)
-			# if here to support both scenarios:
-			if ("xcmsSet" %in% slotNames(xsetData) )
-			{
-				xsetData <- xsetData@xcmsSet
-			}
-
-			# report
-			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
-			message(paste("\nGenerating report.........in ", args.htmlReportFile.files_path))
-
-			html <- "<html><body><h1>Extracted Ion Chromatograms (EIC) matching criteria</h1>"
-
-			if (args.combineSamples == "No")
-			{
-				if (length(args.sampleNames) > 1 && length(mzListStart) > 1 && length(args.sampleNames) != length(mzListStart))
-					stop(paste("The number of sample names should match the number of m/z values in the list. Found ", length(mzListStart),
-					          " masses while ",  length(args.sampleNames), " sample names were given."))
-
-		  		iterSize <- length(args.sampleNames)
-				# these can be set to 1 or 0 just as a trick to iterate OR not over the items. If the respective list is of length 1, only the first item should be used
-				fixSampleIdx <- 1
-				fixMzListIdx <- 1
-				if (length(args.sampleNames) == 1)
-				{
-					fixSampleIdx <- 0
-					iterSize <- length(mzListStart)
-				}
-				if (length(mzListStart) == 1)
-				{
-					fixMzListIdx <- 0
-				}
-				lineColors <- rainbow(iterSize)
-				for (i in 0:(iterSize-1))
-				{
-					message("\nGetting EIC... ")
-					eiccor <- getEIC(xsetData,
-										mzrange=matrix(c(mzListStart[i*fixMzListIdx+1],mzListEnd[i*fixMzListIdx+1]),nrow=1,ncol=2,byrow=TRUE),
-										rtrange=matrix(c(args.rtStart,args.rtEnd),nrow=1,ncol=2,byrow=TRUE),
-										sampleidx=c(args.sampleNames[i*fixSampleIdx+1]), rt=args.rtPlotMode)
-
-					message("\nPlotting figures... ")
-					figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="")
-					html <- paste(html,"<img src='", "figure", i,".png' /><br/>", sep="")
-					png( figureName, type="cairo", width=1100,height=250 )
-					#plot(eiccor, col=lineColors[i+1])
-					# black is better in this case:
-					plot(eiccor)
-					legend('topright', # places a legend at the appropriate place
-							legend=c(args.sampleNames[i*fixSampleIdx+1]), # puts text in the legend
-							lty=c(1,1), # gives the legend appropriate symbols (lines)
-							lwd=c(2.5,2.5))
-
-					devname = dev.off()
-				}
-
-			} else {
-				for (i in 1:length(mzListStart))
-				{
-					message("\nGetting EIC... ")
-					eiccor <- getEIC(xsetData,
-										mzrange=matrix(c(mzListStart[i],mzListEnd[i]),nrow=1,ncol=2,byrow=TRUE),
-										rtrange=matrix(c(args.rtStart,args.rtEnd),nrow=1,ncol=2,byrow=TRUE),
-										sampleidx=args.sampleNames, rt = args.rtPlotMode)
-
-										#set size, set option (plot per sample, plot per mass)
-
-					message("\nPlotting figures... ")
-					figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="")
-					html <- paste(html,"<img src='", "figure", i,".png' />", sep="")
-					png( figureName, type="cairo", width=1100,height=450 )
-					lineColors <- rainbow(length(args.sampleNames))
-					plot(eiccor, col=lineColors)
-					legend('topright', # places a legend at the appropriate place
-						  legend=args.sampleNames, # puts text in the legend
-						  lty=c(1,1), # gives the legend appropriate symbols (lines)
-						  lwd=c(2.5,2.5),
-				          col=lineColors)
-					devname = dev.off()
-				}
-			}
-			if (args.rtPlotMode == "corrected")
-			{
-				html <- paste(html,"<p>*rt values are corrected ones</p></body><html>")
-			}
-			html <- paste(html,"</body><html>")
-			message("finished generating report")
-			write(html,file=args.htmlReportFile)
-			# unlink(args.htmlReportFile)
-			cat("\nWarnings================:\n")
-			str( warnings() )
-		},
-        error=function(cond) {
-            sink(NULL, type="message") # default setting
-			sink(stderr(), type="output")
-            message("\nERROR: ===========\n")
-            print(cond)
-        }
-    )
--- a/xcms_get_mass_eic.xml	Thu Mar 19 12:13:13 2015 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-<tool id="xcms_get_mass_eic" name="XCMS Get EICs"  version="0.0.4">
-	<description> Extracts EICs for a given list of masses</description>
-	<requirements>
-		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
-	</requirements>
-	<command interpreter="Rscript">
-		xcms_get_mass_eic.r
-	    $xsetData
-		$rtStart
-		$rtEnd
-		#if $massParameters.massParametersType == "window"
-			$massParameters.mzStart
-			$massParameters.mzEnd
-			-1
-			"."
-		#else
-			-1
-			-1
-			"$massParameters.mzList"
-			$massParameters.mzTolPpm
-		#end if
-		"$sampleNames"
-		$combineSamples
-		$rtPlotMode
-		$htmlReportFile
-		$htmlReportFile.files_path
-		$outLogFile
-	</command>
-<inputs>
-
-	<param name="xsetData" type="data" format="rdata" label="xset xcms data file" help="E.g. output data file resulting from METAMS run"/>
-
-
-	<param name="rtStart" type="integer" value="" size="10" label="RT start" help="Start of Retention Time region to plot" />
-	<param name="rtEnd" type="integer" value="" size="10"  label="RT end" help="End of Retention Time region to plot" />
-
-	<conditional name="massParameters">
-		<param name="massParametersType" type="select" size="50" label="Give masses as" >
-			<option value="list" selected="true">m/z list</option>
-			<option value="window" >m/z window</option>
-		</param>
-		<when value="list">
-			<param name="mzList" type="text" area="true" size="7x70" label="m/z list"
-				help="Comma or line-separated list of m/z values for which to plot an EIC. One EIC will be plotted for each m/z given here.">
-				<sanitizer>
-					<!-- this translates from line-separated to comma separated list, removes quotes -->
-					<valid/>
-					<mapping initial="none">
-				    	<add source="&#10;" target=","/>
-				    	<add source="&#13;" target=""/>
-				    	<add source="&quot;" target=""/>
-		 			</mapping>
-				</sanitizer>
-			</param>
-			<param name="mzTolPpm" type="integer" size="10" value="5" label="m/z tolerance (ppm)"  />
-		</when>
-		<when value="window">
-			<param name="mzStart" type="float" value="" size="10" label="m/z start" help="Start of m/z window" />
-			<param name="mzEnd" type="float" value="" size="10"  label="m/z end" help="End of m/z window" />
-   		</when>
-	</conditional>
-
-
-	<param name="sampleNames" type="text" area="true" size="10x70" label="List of sample names"
-		value="sampleName1,sampleName2,etc"
-		help="Comma or line-separated list of sample names. Here you can specify the subset of samples
-		to use for the EIC plots. NB: if your dataset has many samples, specifying a subset here can significantly speed up the processing time." >
-		<sanitizer>
-			<!-- this translates from line-separated to comma separated list, removes quotes  -->
-			<valid/>
-			<mapping initial="none">
-		    	<add source="&#10;" target=","/>
-		    	<add source="&#13;" target=""/>
-		    	<add source="&quot;" target=""/>
- 			</mapping>
-		</sanitizer>
-	</param>
-
-	<param name="combineSamples" type="select" size="50" label="Combine samples in EIC"
-	       help="Combining samples means plot contains the combined EIC of a m/z in the different samples. When NOT combining, each plot
-	       only contains the EIC of the m/z in the respectively given sample (the sample name from the sample list in the same position as the
-	       m/z in the m/z list.). Tip: use Yes for visualizing EIC of grouped masses (MsClust or CAMERA groups), use No for visualizing EICs of the same mass in
-	       the different samples.">
-		<option value="No" selected="true">No</option>
-		<option value="Yes" >Yes</option>
-	</param>
-	<param name="rtPlotMode" type="select" size="50" label="RT plot mode"
-	       help="Select whether EIC should be on original or raw Retention Time">
-		<option value="raw" selected="true">Raw</option>
-		<option value="corrected" >Corrected</option>
-	</param>
-</inputs>
-<outputs>
-	<data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - log (LOG)" hidden="True"/>
-	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - EIC(s) report (HTML)"/>
-</outputs>
-<tests>
-	<test>
-	</test>
-</tests>
-<help>
-
-.. class:: infomark
-
-This tool will plot EICs for a given list of masses (or a mass window) using XCMS's getEIC method.
-It produces a HTML report showing the EIC plots, one for each given mass. The figure below shows an example of such an EIC plot.
-
-.. image:: $PATH_TO_IMAGES/massEIC.png
-
-
-  </help>
-  <citations>
-        <citation type="doi">10.1021/ac051437y</citation> <!-- example
-        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
-        -->
-   </citations>
-</tool>
\ No newline at end of file