# HG changeset patch # User lain # Date 1643725170 0 # Node ID 15c9fbefeaf12c126982496f7345c49eb398cd00 " master branch Updating" diff -r 000000000000 -r 15c9fbefeaf1 Dockerfile.planemo --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Dockerfile.planemo Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,55 @@ + +FROM python:3.8-buster + +# set author +MAINTAINER Lain Pavot + +# set encoding +ENV LC_ALL en_US.UTF-8 +ENV LANG en_US.UTF-8 + +ENV PLANEMO_VENV_LOCATION /planemo-venv +ENV CONDA /tmp/conda + +RUN \ + apt-get update \ + && apt-get install -y --no-install-recommends \ + ed \ + less \ + locales \ + vim-tiny \ + wget \ + ca-certificates \ + fonts-texgyre \ + && echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \ + && locale-gen en_US.utf8 \ + && /usr/sbin/update-locale LANG=en_US.UTF-8 \ + && echo "deb http://http.debian.net/debian buster main" > /etc/apt/sources.list.d/debian-unstable.list \ + && echo 'APT::Default-Release "buster";' > /etc/apt/apt.conf.d/default \ + && echo 'APT::Install-Recommends "false";' > /etc/apt/apt.conf.d/90local-no-recommends \ + && apt-get update \ + && apt-get upgrade -y \ + && apt-get install -y --no-install-recommends \ + git \ + littler \ + libhdf5-dev \ + r-cran-littler \ + r-base \ + r-base-dev \ + r-recommended \ + python-virtualenv \ + && R -e 'install.packages("batch", repos="http://cran.us.r-project.org")' \ + && pip install virtualenv \ + && python -m virtualenv "$PLANEMO_VENV_LOCATION" \ + && . "$PLANEMO_VENV_LOCATION"/bin/activate \ + && pip install --upgrade pip setuptools \ + && pip install planemo numpy \ + && planemo conda_init --conda_prefix "$CONDA" \ + && apt-get clean autoclean \ + && apt-get autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log}/ \ + && rm -rf /usr/bin/X11 \ + && rm -rf /tmp/* ; + +CMD [] + diff -r 000000000000 -r 15c9fbefeaf1 Dockerfile.xseeker --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Dockerfile.xseeker Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,110 @@ +# call parent container +#FROM r4.0.0 +FROM debian:testing + +# set author +MAINTAINER Lain Pavot + +# set encoding +ENV LANG en_US.UTF-8 + +## we copy the installer and run it before copying the entier project to prevent +## reinstalling everything each time the project has changed + +COPY ./XSeeker/install.R /tmp/ + +RUN mkdir -p /XSeeker/input/ /var/log/ +VOLUME ["/XSeeker/input/"] + +EXPOSE 8765 +ENV LC_ALL en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV R_BASE_VERSION 4.0.3 + +RUN \ + apt-get update \ + && apt-get install -y --no-install-recommends \ + ed \ + less \ + locales \ + vim-tiny \ + wget \ + ca-certificates \ + fonts-texgyre \ + && echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \ + && locale-gen en_US.utf8 \ + && /usr/sbin/update-locale LANG=en_US.UTF-8 \ + && echo "deb http://http.debian.net/debian sid main" > \ + /etc/apt/sources.list.d/debian-unstable.list \ + && echo 'APT::Default-Release "testing";' > \ + /etc/apt/apt.conf.d/default \ + && echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/90local-no-recommends \ + && chmod o+r /etc/resolv.conf \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + file \ + gcc-9-base \ + libopenblas0-pthread \ + littler \ + libcurl4-openssl-dev \ + libxml2-dev \ + libssl-dev \ + libpwiz-dev \ + libpwiz3 \ + r-cran-littler \ + r-base \ + r-base-dev \ + r-recommended \ + r-cran-rlang \ + r-cran-dt \ + r-cran-magrittr \ + r-cran-ncdf4 \ + r-cran-devtools \ + r-cran-plotly \ + r-cran-httpuv \ + r-cran-xtable \ + r-cran-sourcetools \ + r-cran-fastmap \ + r-cran-rcurl \ + r-cran-proxy \ + r-cran-htmlwidgets \ + r-cran-rsqlite \ + r-cran-openxlsx \ + r-cran-stringr \ + r-cran-dplyr \ + r-cran-purrr \ + r-cran-blob \ + r-cran-dbscan \ + r-cran-biocmanager \ + r-bioc-biocgenerics \ + r-bioc-biobase \ + r-bioc-protgenerics \ + r-bioc-zlibbioc \ + && Rscript /tmp/install.R \ + && apt-get clean autoclean \ + && apt-get autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log}/ \ + && rm -rf /tmp/* ; + +COPY ./XSeeker /XSeeker +COPY ./XSeekerBaseModule /tmp/XSeekerBaseModule + +COPY ./resources/XSeekerBaseModule/ /tmp/XSeekerBaseModule/ +COPY ./resources/XSeeker/config/ /XSeeker/config/ + +RUN \ + cd /tmp/XSeekerBaseModule \ + && ./configure \ + && make install \ + && cd /XSeeker \ + && ./configure \ + && make all_modules install \ + && rm -rf /tmp/XSeekerBaseModule/ ; + + +RUN chmod +x /XSeeker/run.sh + +COPY ./resources/entrypoint.sh / +RUN chmod +x /entrypoint.sh +CMD /entrypoint.sh diff -r 000000000000 -r 15c9fbefeaf1 XSeeker.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/XSeeker.xml Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,45 @@ + + Webbased Interactive bidules visualization + + xseeker + + + + / + 8765 + + + + TRUE + http://172.17.0.1:8080/ + $__history_id__ + + + + + + + + + + + + + + + + + i love to talk to me myself + + diff -r 000000000000 -r 15c9fbefeaf1 resources/XSeeker/config/XSeeker.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/XSeeker/config/XSeeker.config Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,10 @@ + + + + +R_DIRECTORY="/R" +R_BINARY="R" +R_SCRIPT="Rscript" + +PROJECT_ROOT=`dirname $(readlink -f $0)` +MODULE_BINARY_DIRECTORY="${PROJECT_ROOT}/bin" diff -r 000000000000 -r 15c9fbefeaf1 resources/XSeeker/config/config.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/XSeeker/config/config.yml Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,100 @@ + + +default: + + + XSeeker_path: "/XSeeker" + input_directory: "{{XSeeker_path}}/input/" + + + galaxy: + active: TRUE + export_directory: "{{XSeeker_path}}/output/" + export_method: "directory" + + + database: + base_data: "{{XSeeker_path}}/data/base.sql" + recreate: FALSE + models: + adduct + cluster + compound + feature + instrument + instrument_config + project + sample + smol_xcms_set + software + camera_parameters + pairing_parameters + peak_picking_parameters + alignmenmt_parameters + models_definition_path: "{{XSeeker_path}}/config/models.R" + connection: + connector: "SQLite" + dbname: "{{XSeeker_path}}/test.sqlite" + + + shiny: + host: "0.0.0.0" + port: 8765 + show_errors: TRUE + appname: "XSeeker" + base_ui_path: "{{XSeeker_path}}/R/base_ui.R" + added_resources: + www: "{{XSeeker_path}}/www/" + sbs: "{{XSeeker_path}}/dist/shinyBS/www/" + + + modules: + checksums_file: "{{XSeeker_path}}/.checksums" + re_compile_modified: FALSE + sources_location: "{{XSeeker_path}}/modules" + binaries_location: "{{XSeeker_path}}/bin" + binaries: + XSeekerRdataManager + XSeekerFilePreProcessing + XSeekerPreProcessPeakPicking + XSeekerPreProcessAlignment + XSeekerPreProcessCamera + XSeekerKMDPlot + XSeekerComputeFormula + XSeekerChromatoAndMassSpectrum + XSeekerClusterTable + XSeekerDatabaseManager + XSeekerDatabaseSummary + XSeekerHairyPlotter + XSeekerGalaxyInterfacR + + sources: + + + XSeekerRdataManager: + autoload: TRUE + download_rdata_mzml_from_galaxy: FALSE + + +production: + + database: + recreate: FALSE + connection: + connector: "SQLite" + dbname: "{{XSeeker_path}}/production.sqlite" + + modules: + re_compile_modified: FALSE + + +docker: + + database: + recreate: TRUE + connection: + connector: "SQLite" + dbname: "file::memory:" + + modules: + re_compile_modified: FALSE diff -r 000000000000 -r 15c9fbefeaf1 resources/XSeeker/config/models.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/XSeeker/config/models.R Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,185 @@ + +tryCatch({ + DBModelR::ModelDefinition(table="yui", fields=list(yui="INTEGER")) +}, error=function(e) { + stop("Please, install DBModelR before you source this file.") +}) + +list( + adduct=DBModelR::ModelDefinition( + table="adduct", + fields=list( + name="TEXT", + mass="FLOAT", + charge="INTEGER", + multi="INTEGER", + formula_add="TEXT", + formula_ded="TEXT", + sign="TEXT", + oidscore="INTEGER", + quasi="INTEGER", + ips="FLOAT" + ) + ), + cluster=DBModelR::ModelDefinition( + table="cluster", + fields=list( + clusterID="INTEGER", + formula="TEXT", + annotation="TEXT", + coeff="FLOAT", + r_squared="FLOAT", + charge="INTEGER", + mean_rt="FLOAT", + score="FLOAT", + deviation="FLOAT", + status="TEXT", + # adduct="TEXT", + curent_group="INTEGER", + pc_group="INTEGER", + align_group="INTEGER", + xcms_group="INTEGER" + ), + one=list("compound", "adduct"), + many=list("sample") + ), + compound=DBModelR::ModelDefinition( + table="compound", + fields=list( + name="TEXT", + common_name="TEXT", + formula="TEXT", + charge="INTEGER", + date="TEXT", + mz="FLOAT" + ) + ), + feature=DBModelR::ModelDefinition( + table="feature", + fields=list( + featureID="INTEGER", + mz="FLOAT", + mz_min="FLOAT", + mz_max="FLOAT", + rt="FLOAT", + rt_min="FLOAT", + rt_max="FLOAT", + int_o="FLOAT", + int_b="FLOAT", + max_o="FLOAT", + iso="TEXT", + abundance="FLOAT" + ), + one=list("cluster"), + many=list("sample") + ), + instrument=DBModelR::ModelDefinition( + table="instrument", + fields=list( + model="TEXT", + manufacturer="TEXT", + analyzer="TEXT", + detector_type="TEXT", + ion_source="TEXT" + ) + ), + instrument_config=DBModelR::ModelDefinition( + table="instrument_config", + fields=list( + resolution="TEXT", + agc_target="TEXT", + maximum_IT="TEXT", + number_of_scan_range="TEXT", + scan_range="TEXT", + version="TEXT" + ) + ), + project=DBModelR::ModelDefinition( + table="project", + fields=list( + name="TEXT", + comment="TEXT" + ), + one=list("sample") + ), + sample=DBModelR::ModelDefinition( + table="sample", + fields=list( + name="TEXT", + path="TEXT", + polarity="TEXT", + kind="TEXT", ## rdata or mxml or enriched_rdata + raw="BLOB" + ), + one=list( + "peak_picking_parameters", + "pairing_parameters", + "alignmenmt_parameters", + "camera_parameters", + "instrument", + "instrument_config", + "software", + "smol_xcms_set" + ) + ), + smol_xcms_set=DBModelR::ModelDefinition( + table="smol_xcms_set", + fields=list( + raw="BLOB" + ) + ), + software=DBModelR::ModelDefinition( + table="software", + fields=list( + name="TEXT", + version="TEXT" + ) + ), + # camera_parameters=DBModelR::ModelDefinition( + # table="camera_parameters", + # fields=list() + # ), + # pairing_parameters=DBModelR::ModelDefinition( + # table="pairing_parameters", + # fields=list() + # ), + peak_picking_parameters=DBModelR::ModelDefinition( + table="peak_picking_parameters", + fields=list( + ppm="FLOAT", + peakwidth="TEXT", + snthresh="TEXT", + prefilterStep="TEXT", + prefilterLevel="TEXT", + mzdiff="TEXT", + fitgauss="TEXT", + noise="TEXT", + mzCenterFun="TEXT", + integrate="INTEGER", + firstBaselineCheck="TEXT", + snthreshIsoROIs="TEXT", + maxCharge="INTEGER", + maxIso="INTEGER", + mzIntervalExtension="TEXT" + ) + ), + alignmenmt_parameters=DBModelR::ModelDefinition( + table="alignmenmt_parameters", + fields=list( + binSize="TEXT", + centerSample="TEXT", + response="TEXT", + distFun="TEXT", + gapInit="TEXT", + gapExtend="TEXT", + factorDiag="TEXT", + factorGap="TEXT", + localAlignment="INTEGER", + initPenalty="TEXT", + bw="TEXT", + minFraction="TEXT", + minSamples="TEXT", + maxFeatures="TEXT" + ) + ) +) diff -r 000000000000 -r 15c9fbefeaf1 resources/XSeekerBaseModule/config.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/XSeekerBaseModule/config.sh Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,10 @@ + + + + +R_DIRECTORY="/R" +R_BINARY="R" +R_SCRIPT="Rscript" + +PROJECT_ROOT=`dirname $(readlink -f $0)` +MODULE_BINARY_DIRECTORY="${PROJECT_ROOT}/bin" diff -r 000000000000 -r 15c9fbefeaf1 resources/entrypoint.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/entrypoint.sh Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,14 @@ +#!/bin/bash + +apt-get install iproute2 original-awk iputils-ping -y + +HOST_DOMAIN="host.docker.internal" +ping -q -c1 $HOST_DOMAIN > /dev/null 2>&1 +if [ $? -ne 0 ]; then + HOST_IP=$(ip route | awk 'NR==1 {print $3}') + echo -e "$HOST_IP\t$HOST_DOMAIN" >> /etc/hosts +fi + + +cd /XSeeker +stdbuf -i0 -o0 -e0 ./run.sh 2>&1 > /var/log/xseeker.log \ No newline at end of file diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIE/config/datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIE/config/datatypes_conf.xml Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,23 @@ +diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample +index 8b887b8568..d1d9fb7ab5 100644 +--- a/lib/galaxy/config/sample/datatypes_conf.xml.sample ++++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample +@@ -809,6 +809,9 @@ + + + ++ ++ ++ + + + + + + + HistoryDatasetAssociation + + xseeker.sqlite.Binary + binary.Binary + dataset_id + + + + dataset_id + + + xseeker.mako + diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIE/config/plugins/interactive_environments/xseeker/templates/xseeker.mako --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIE/config/plugins/interactive_environments/xseeker/templates/xseeker.mako Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,65 @@ +<%namespace name="ie" file="ie.mako"/> + +<% + + # Sets ID and sets up a lot of other variables + ie_request.load_deploy_config() + ie_request.attr.docker_port = 8765 + + docker_input_filename = "/XSeeker/input/input.{}".format(hda.ext) + + user_file = ie_request.volume( + docker_input_filename, + hda.file_name, + mode='ro' + ) + + # Launch the IE. This builds and runs the docker command in the background. + ie_request.launch( + volumes=[user_file], + ) + + + ## General IE specific + # Access URLs for the notebook from within galaxy. + notebook_access_url = ie_request.url_template('${PROXY_URL}//') + +%> + + + ${ ie.load_default_js() } + ${ ie.load_default_app() } + + + + +
+
+ + diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIE/config/tool_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIE/config/tool_conf.xml Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,13 @@ +diff --git a/lib/galaxy/config/sample/tool_conf.xml.sample b/lib/galaxy/config/sample/tool_conf.xml.sample +index 286a0c9f1c..6472c6bf20 100644 +--- a/lib/galaxy/config/sample/tool_conf.xml.sample ++++ b/lib/galaxy/config/sample/tool_conf.xml.sample +@@ -1,5 +1,8 @@ + + ++
++ ++
+
+ + diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIE/lib/galaxy/datatypes/binary.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIE/lib/galaxy/datatypes/binary.py Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,63 @@ +diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py +index 1dc84499f8..50ec1acd16 100644 +--- a/lib/galaxy/datatypes/binary.py ++++ b/lib/galaxy/datatypes/binary.py +@@ -2868,6 +2868,58 @@ class WiffTar(BafTar): + return "Sciex WIFF/SCAN archive" + + ++class XSeekerDatabase(SQlite): ++ """Class describing an XSeeker Sqlite database """ ++ MetadataElement( ++ name="xseeker_version", ++ default="1.0.0", ++ param=MetadataParameter, ++ desc="XSeeker Version", ++ readonly=True, ++ visible=True, ++ no_value="1.0.0" ++ ) ++ file_ext = "xseeker.sqlite" ++ edam_format = "format_3622" ++ edam_data = "data_3498" ++ ++ def set_meta(self, dataset, overwrite=True, **kwd): ++ super(XSeekerDatabase, self).set_meta(dataset, overwrite=overwrite, **kwd) ++ try: ++ conn = sqlite.connect(dataset.file_name) ++ c = conn.cursor() ++ tables_query = "SELECT database_version FROM XSeeker_tagging_table" ++ result = c.execute(tables_query).fetchall() ++ for version, in result: ++ dataset.metadata.xseeker_vesrion = version ++ # TODO: Can/should we detect even more attributes, such as use of PED file, what was input annotation type, etc. ++ except Exception as e: ++ log.warning('%s, set_meta Exception: %s', self, e) ++ ++ def sniff(self, filename): ++ if super(XSeekerDatabase, self).sniff(filename): ++ table_names = [ ++ "XSeeker_tagging_table" ++ ] ++ return self.sniff_table_names(filename, table_names) ++ return False ++ ++ def set_peek(self, dataset, is_multi_byte=False): ++ if not dataset.dataset.purged: ++ dataset.peek = "XSeeker SQLite Database, version %s" % (dataset.metadata.xseeker_version or 'unknown') ++ dataset.blurb = nice_size(dataset.get_size()) ++ else: ++ dataset.peek = 'file does not exist' ++ dataset.blurb = 'file purged from disk' ++ ++ def display_peek(self, dataset): ++ try: ++ return dataset.peek ++ except Exception: ++ return "XSeeker SQLite Database, version %s" % (dataset.metadata.xseeker_version or 'unknown') ++ ++ ++ + if __name__ == '__main__': + import doctest + doctest.testmod(sys.modules[__name__]) diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIE/lib/galaxy/datatypes/text.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIE/lib/galaxy/datatypes/text.py Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,43 @@ +diff --git a/lib/galaxy/datatypes/text.py b/lib/galaxy/datatypes/text.py +index b1884da52b..4ad1006c56 100644 +--- a/lib/galaxy/datatypes/text.py ++++ b/lib/galaxy/datatypes/text.py +@@ -804,3 +804,38 @@ class Gfa1(Text): + return False + found_valid_lines = True + return found_valid_lines ++ ++ ++@build_sniff_from_prefix ++class SQL(Text): ++ """Class describing an html file""" ++ file_ext = "sql" ++ ++ def set_peek(self, dataset, is_multi_byte=False): ++ if not dataset.dataset.purged: ++ dataset.peek = "SQL file" ++ dataset.blurb = nice_size(dataset.get_size()) ++ else: ++ dataset.peek = "file does not exist" ++ dataset.blurb = "file purged from disk" ++ ++ def get_mime(self): ++ """Returns the mime type of the datatype""" ++ return "application/sql" ++ ++ def sniff_prefix(self, file_prefix): ++ """ ++ Uses some patterns usualy encountered in sql files to guess ++ it's type ++ """ ++ start = file_prefix.string_io().read(42).strip() ++ return any( ++ header in start ++ for header in ( ++ "CREATE DATABASE", ++ "INSERT INTO", ++ "CREATE TABLE", ++ "BEGIN TRANSACTION" ++ ) ++ ) ++ diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIE/lib/galaxy/visualization/plugins/interactive_environments.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIE/lib/galaxy/visualization/plugins/interactive_environments.py Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,21 @@ +diff --git a/lib/galaxy/visualization/plugins/interactive_environments.py b/lib/galaxy/visualization/plugins/interactive_environments.py +index 6f5374af37..2fc9e72210 100644 +--- a/lib/galaxy/visualization/plugins/interactive_environments.py ++++ b/lib/galaxy/visualization/plugins/interactive_environments.py +@@ -137,13 +137,13 @@ class InteractiveEnvironmentRequest: + # .get() that will ignore missing sections, so we must make use of + # their defaults dictionary instead. + default_dict = { +- 'container_interface': None, ++ 'container_interface': "", + 'command': 'docker', + 'command_inject': '-e DEBUG=false -e DEFAULT_CONTAINER_RUNTIME=120', + 'docker_hostname': 'localhost', + 'wx_tempdir': 'False', +- 'docker_galaxy_temp_dir': None, +- 'docker_connect_port': None, ++ 'docker_galaxy_temp_dir': "", ++ 'docker_connect_port': "", + } + viz_config = configparser.SafeConfigParser(default_dict) + conf_path = os.path.join(self.attr.our_config_dir, self.attr.viz_id + ".ini") diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIE/tools/LC-MSMS/XSeekerPreparator.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIE/tools/LC-MSMS/XSeekerPreparator.R Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,921 @@ + + +TOOL_NAME <- "XSeekerPreparator" +VERSION <- "1.1.0" + +OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" + +ENRICHED_RDATA_VERSION <- paste("1.1.0", OUTPUT_SPECIFIC_TOOL, sep="-") +ENRICHED_RDATA_DOC <- sprintf(" +Welcome to the enriched of the output of CAMERA/xcms. +This doc was generated by the tool: %s - Version %s +To show the different variables contained in this rdata, type: + - `load('this_rdata.rdata', rdata_env <- new.env())` + - `names(rdata_env)` + +Sections +###### + + +This tools helpers +------ + The version number is somewhat special because the evolution of the + rdata's format is non-linear. + There may be different branches, each evolving separatly. + To reflect these branches's diversions, there may be a prepended + branch name following this format: + major.minor.patch-branch_name + Like this, we can process rdata with the same tool, and output + rdata formated differently, for each tool. + + + - enriched_rdata: + - Description: flag created by that tool to tell it was enriched. + - Retrieval method: enriched_rdata <- TRUE + + - enriched_rdata_version: + - Description: A flag created by that tool to tell which version of + this tool has enriched the rdata. + - Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION) + + - enriched_rdata_doc: + - Description: Contains the documentation string. + +Data from original mzxml file +------ + - tic: + - Description: Those are the tic values from the original mzxml + file, extracted using xcms 2. + - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@tic + - xcms version: 2.0 + + - mz: + - Description: Those are the m/z values from the original mzxml + file, extracted using xcms 2. + - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$mz + - xcms version: 2.0 + + - scanindex: + - Description: Those are the scanindex values from the original mzxml + file, extracted using xcms 2. + - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scanindex + - xcms version: 2.0 + + - scantime: + - Description: Those are the scantime values from the original mzxml + file, extracted using xcms 2. + - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scantime + - xcms version: 2.0 + + - intensity: + - Description: Those are the intensity values from the original mzxml + file, extracted using xcms 2. + - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$intensity + - xcms version: 2.0 + + - polarity: + - Description: Those are the polarity values from the original mzxml + file, extracted using xcms 2. + - Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]]) + - xcms version: 2.0 + +Data taken from incoming rdata +------ + - variableMetadata: + - Description: Unmodified copy of variableMetadata from incoming rdata. + - Retrieval method: rdata_file$variableMetadata + + - process_params: + - Description: Those are the processing parameters values from the + curent rdata. They have been simplified to allow easy access like: + for (params in process_params) { + if (params[[\"xfunction\"]] == \"annotatediff\") { + process_peak_picking_params(params) + } + } + - Retrieval method: + ## just he same list, but simplified + process_params <- list() + for (list_name in names(rdata_file$listOFlistArguments)) { + param_list <- list() + for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) { + param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]] + } + process_params[[length(process_params)+1]] <- param_list + } +", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION) + + + +get_models <- function(path) { + if (is.null(path)) { + stop("No models to define the database schema") + } else { + message(sprintf("Loading models from %s", path)) + } + ## galaxy mangles the "@" to a "__at__" + if (substr(path, 1, 9) == "git__at__") { + path <- sub("^git__at__", "git@", path, perl=TRUE) + } + if ( + substr(path, 1, 4) == "git@" + || substr(path, length(path)-4, 4) == ".git" + ) { + return (get_models_from_git(path)) + } + if (substr(path, 1, 4) == "http") { + return (get_models_from_url(path)) + } + return (source(path)$value) +} + +get_models_from_git <- function (url, target_file="models.R", rm=TRUE) { + tmp <- tempdir() + message(sprintf("Cloning %s", url)) + system2("git", c("clone", url, tmp)) + result <- search_tree(file.path(tmp, dir), target_file) + if (!is.null(result)) { + models <- source(result)$value + if (rm) { + unlink(tmp, recursive=TRUE) + } + return (models) + } + if (rm) { + unlink(tmp, recursive=TRUE) + } + stop(sprintf( + "Could not find any file named \"%s\" in this repo", + target_file + )) +} + +get_models_from_url <- function (url, target_file="models.R", rm=TRUE) { + tmp <- tempdir() + message(sprintf("Downloading %s", url)) + result <- file.path(tmp, target_file) + if (download.file(url, destfile=result) == 0) { + models <- source(result)$value + if (rm) { + unlink(tmp, recursive=TRUE) + } + return (models) + } + if (rm) { + unlink(tmp, recursive=TRUE) + } + stop("Could not download any file at this adress.") +} + +search_tree <- function(path, target) { + target <- tolower(target) + for (file in list.files(path)) { + if (is.dir(file)) { + result <- search_tree(file.path(path, file), target) + if (!is.null(result)) { + return (result) + } + } else if (tolower(file) == target) { + return (file.path(path, file)) + } + } + return (NULL) +} + +create_database <- function(orm) { + orm$recreate_database(no_exists=FALSE) + set_database_version(orm, "created") +} + +insert_adducts <- function(orm) { + message("Creating adducts...") + adducts <- list( + list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"), + list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"), + list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"), + list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"), + list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"), + list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"), + list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"), + list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"), + list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"), + list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"), + list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"), + list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"), + list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"), + list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"), + list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"), + list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"), + list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"), + list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"), + list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"), + list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"), + list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"), + list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"), + list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"), + list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"), + list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"), + list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"), + list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"), + list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"), + list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"), + list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"), + list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"), + list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"), + list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"), + list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"), + list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"), + list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"), + list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"), + list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"), + list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"), + list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"), + list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"), + list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"), + list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"), + list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"), + list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"), + list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"), + list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"), + list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"), + list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"), + list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"), + list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"), + list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"), + list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"), + list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"), + list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"), + list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"), + list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"), + list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"), + list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"), + list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"), + list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"), + list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"), + list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"), + list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"), + list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"), + list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"), + list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"), + list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"), + list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"), + list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"), + list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"), + list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"), + list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"), + list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"), + list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"), + list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"), + list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"), + list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"), + list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"), + list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"), + list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"), + list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"), + list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"), + list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"), + list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"), + list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"), + list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"), + list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"), + list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"), + list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"), + list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"), + list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"), + list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"), + list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"), + list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"), + list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"), + list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"), + list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"), + list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"), + list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"), + list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"), + list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"), + list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"), + list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"), + list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"), + list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0") + ) + dummy_adduct <- orm$adduct() + for (adduct in adducts) { + i <- 0 + dummy_adduct$set_name(adduct[[i <- i+1]]) + dummy_adduct$set_multi(adduct[[i <- i+1]]) + dummy_adduct$set_charge(adduct[[i <- i+1]]) + dummy_adduct$set_mass(adduct[[i <- i+1]]) + dummy_adduct$set_oidscore(adduct[[i <- i+1]]) + dummy_adduct$set_quasi(adduct[[i <- i+1]]) + dummy_adduct$set_ips(adduct[[i <- i+1]]) + dummy_adduct$set_formula_add(adduct[[i <- i+1]]) + dummy_adduct$set_formula_ded(adduct[[i <- i+1]]) + dummy_adduct$save() + dummy_adduct$clear(unset_id=TRUE) + } + message("Adducts created") +} + +insert_base_data <- function(orm, path, archetype=FALSE) { + if (archetype) { + ## not implemented yet + return () + } + base_data <- readLines(path) + for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) { + orm$execute(sql) + } + set_database_version(orm, "enriched") +} + +insert_compounds <- function(orm, compounds_path) { + compounds <- read.csv(file=compounds_path, sep="\t") + if (is.null(compounds <- translate_compounds(compounds))) { + stop("Could not find asked compound's attributes in csv file.") + } + dummy_compound <- orm$compound() + compound_list <- list() + for (i in seq_len(nrow(compounds))) { + dummy_compound$set_mz(compounds[i, "mz"]) + dummy_compound$set_name(compounds[i, "name"]) + dummy_compound$set_common_name(compounds[i, "common_name"]) + dummy_compound$set_formula(compounds[i, "formula"]) + # dummy_compound$set_mz(compounds[i, "mz"]) + # dummy_compound$set_mz(compounds[i, "mz"]) + compound_list[[length(compound_list)+1]] <- as.list( + dummy_compound, + c("mz", "name", "common_name", "formula") + ) + dummy_compound$clear(unset_id=TRUE) + } + dummy_compound$save(bulk=compound_list) +} + +translate_compounds <- function(compounds) { + recognized_headers <- list( + c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey") + ) + header_translators <- list( + hmdb_header_translator + ) + for (index in seq_along(recognized_headers)) { + headers <- recognized_headers[[index]] + if (identical(colnames(compounds), headers)) { + return (header_translators[[index]](compounds)) + } + } + if (is.null(translator <- guess_translator(colnames(compounds)))) { + return (NULL) + } + return (csv_header_translator(translator, compounds)) +} + +guess_translator <- function(header) { + result <- list( + # HMDB_ID=NULL,< + mz=NULL, + name=NULL, + common_name=NULL, + formula=NULL, + # inchi_key=NULL + ) + asked_cols <- names(result) + for (asked_col in asked_cols) { + for (col in header) { + if ((twisted <- tolower(col)) == asked_col + || gsub("-", "_", twisted) == asked_col + || gsub(" ", "_", twisted) == asked_col + || tolower(gsub("(.)([A-Z])", "\\1_\\2", col)) == asked_col + ) { + result[[asked_col]] <- col + next + } + } + } + if (any(mapply(is.null, result))) { + return (NULL) + } + return (result) +} + +hmdb_header_translator <- function(compounds) { + return (csv_header_translator( + list( + HMDB_ID="HMDB_ID", + mz="MzBank", + name="MetName", + common_name="MetName", + formula="ChemFormula", + inchi_key="INChIkey" + ), compounds + )) +} + +csv_header_translator <- function(translation_table, csv) { + header_names <- names(translation_table) + result <- data.frame(1:nrow(csv)) + # colnames(result) <- header_names + for (i in seq_along(header_names)) { + result[, header_names[[i]]] <- csv[, translation_table[[i]]] + } + print(result[, "mz"]) + result[, "mz"] <- as.numeric(result[, "mz"]) + print(result[, "mz"]) + return (result) +} + +set_database_version <- function(orm, version) { + orm$set_tag( + version, + tag_name="database_version", + tag_table_name="XSeeker_tagging_table" + ) +} + +process_rdata <- function(orm, rdata, options) { + mzml_tmp_dir <- gather_mzml_files(rdata) + samples <- names(rdata$singlefile) + if (!is.null(options$samples)) { + samples <- samples[options$samples %in% samples] + } + show_percent <- ( + is.null(options$`not-show-percent`) + || options$`not-show-percent` == FALSE + ) + error <- tryCatch({ + process_sample_list( + orm, rdata, samples, + show_percent=show_percent + ) + NULL + }, error=function(e) { + message(e) + e + }) + if (!is.null(mzml_tmp_dir)) { + unlink(mzml_tmp_dir, recursive=TRUE) + } + if (!is.null(error)) { + stop(error) + } +} + +gather_mzml_files <- function(rdata) { + if (is.null(rdata$singlefile)) { + message("Extracting mxml files") + tmp <- tempdir() + rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp) + names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile)) + message("Extracted") + return (tmp) + } + return (NULL) +} + +process_sample_list <- function(orm, radta, sample_names, show_percent) { + file_grouping_var <- find_grouping_var(rdata$variableMetadata) + message("Processing samples.") + message(sprintf("File grouping variable: %s", file_grouping_var)) + if(is.null(file_grouping_var)) { + stop("Malformed variableMetada.") + } + + process_arg_list <- rdata$listOFlistArguments + process_params <- list() + for (list_name in names(process_arg_list)) { + param_list <- list() + for (param_name in names(process_arg_list[[list_name]])) { + param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]] + } + process_params[[length(process_params)+1]] <- param_list + } + message("Parameters from previous processes extracted.") + + var_meta <- rdata$variableMetadata + align_group <- rep(0, nrow(var_meta)) + var_meta <- cbind(var_meta, align_group) + context <- new.env() + context$clusters <- list() + context$groupidx <- rdata$xa@xcmsSet@groupidx + context$peaks <- rdata$xa@xcmsSet@peaks + context$show_percent <- show_percent + + indices <- as.numeric(unique(var_meta[, file_grouping_var])) + smol_xcms_set <- orm$smol_xcms_set() + mz_tab_info <- new.env() + xcms_set <- rdata$xa@xcmsSet + g <- xcms::groups(xcms_set) + mz_tab_info$sampnames <- xcms::sampnames(xcms_set) + mz_tab_info$sampclass <- xcms::sampclass(xcms_set) + mz_tab_info$rtmed <- g[,"rtmed"] + mz_tab_info$mzmed <- g[,"mzmed"] + mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into") + str(as.list(mz_tab_info)) + serialized <- serialize(mz_tab_info, NULL) + compressed <- fst::compress_fst(serialized, compression=100) + blobified <- blob::blob(compressed) + print(length(blobified)) + smol_xcms_set$set_raw(blobified)$save() + # smol_xcms_set$set_raw(blobified)$save() + # smol_xcms_set$save() + for (no in indices) { + sample_name <- names(rdata$singlefile)[[no]] + sample_path <- rdata$singlefile[[no]] + if ( + is.na(no) + || is.null(sample_path) + || !(sample_name %in% sample_names) + ) { + next + } + ms_file=xcms::xcmsRaw(sample_path) + env <- new.env() + env$variableMetadata <- var_meta[var_meta[, file_grouping_var]==no,] + env$tic <- ms_file@tic + env$mz <- ms_file@env$mz + env$scanindex <- ms_file@scanindex + env$scantime <- ms_file@scantime + env$intensity <- ms_file@env$intensity + env$polarity <- as.character(ms_file@polarity[[1]]) + env$sample_name <- sample_name + env$dataset_path <- sample_path + env$process_params <- process_params + env$enriched_rdata <- TRUE + env$enriched_rdata_version <- ENRICHED_RDATA_VERSION + env$tool_name <- TOOL_NAME + env$enriched_rdata_doc <- ENRICHED_RDATA_DOC + context$sample_no <- no + add_sample_to_database(orm, env, context)#, smol_xcms_set) + } + message("Features enrichment") + complete_features(orm, context) + message("Features enrichment done.") + return (NULL) +} + +find_grouping_var <- function(var_meta) { + for (grouping_var in c(".", "Bio")) { + if (!is.null(rdata$variableMetadata[[grouping_var]])) { + return (grouping_var) + } + } + return (NULL) +} + +add_sample_to_database <- function(orm, env, context){#, smol_xcms_set) { + message(sprintf("Processing sample %s", env$sample_name)) + sample <- ( + orm$sample() + $set_name(env$sample_name) + $set_path(env$dataset_path) + $set_kind("enriched_rdata") + $set_polarity( + if (is.null(env$polarity) || identical(env$polarity, character(0))) "" + else env$polarity + ) + # $set_smol_xcms_set(smol_xcms_set) + $set_raw(blob::blob(fst::compress_fst( + serialize(env, NULL), + compression=100 + ))) + $save() + ) + load_variable_metadata(orm, sample, env$variableMetadata, context) + load_process_params(orm, sample, env$process_params) + message(sprintf("Sample %s inserted.", env$sample_name)) + return (sample) +} + + +load_variable_metadata <- function(orm, sample, var_meta, context) { + all_clusters <- orm$cluster()$all() + + next_feature_id <- get_next_id(orm$feature()$all(), "featureID") + next_cluster_id <- get_next_id(all_clusters, "clusterID") + next_pc_group <- get_next_id(all_clusters, "pc_group") + next_align_group <- get_next_id(all_clusters, "align_group") + message("Extracting features") + invisible(create_features( + orm, sample, var_meta, context, + next_feature_id, next_cluster_id, + next_pc_group, next_align_group + )) + message("Extracting features done.") + return (NULL) +} + +get_next_id <- function(models, attribute) { + if ((id <- models$max(attribute)) == Inf || id == -Inf) { + return (1) + } + return (id + 1) +} + +create_features <- function( + orm, sample, var_meta, context, + next_feature_id, next_cluster_id, + next_pc_group, next_align_group +) { + field_names <- as.list(names(orm$feature()$fields__)) + field_names[field_names=="id"] <- NULL + + features <- list() + dummy_feature <- orm$feature() + + if (show_percent <- context$show_percent) { + percent <- -1 + total <- nrow(var_meta) + } + for (row in seq_len(nrow(var_meta))) { + if (show_percent && (row / total) * 100 > percent) { + percent <- percent + 1 + message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) + } + + curent_var_meta <- var_meta[row, ] + + peak_list <- context$peaks[context$groupidx[[row]], ] + sample_peak_list <- peak_list[peak_list[, "sample"] == context$sample_no, , drop=FALSE] + if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { + if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { + dummy_feature$set_int_o(int_o) + } + if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { + dummy_feature$set_int_b(int_b) + } + if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { + dummy_feature$set_max_o(max_o) + } + } + + set_feature_fields_from_var_meta(dummy_feature, curent_var_meta) + + dummy_feature$set_featureID(next_feature_id) + next_feature_id <- next_feature_id + 1 + fake_iso <- dummy_feature$get_iso() + iso <- extract_iso(fake_iso) + clusterID <- extract_clusterID(fake_iso, next_cluster_id) + context$clusterID <- clusterID + dummy_feature$set_iso(iso) + create_associated_cluster( + sample, dummy_feature, clusterID, + context, curent_var_meta, next_pc_group, + next_align_group + ) + next_align_group <- next_align_group + 1 + features[[length(features)+1]] <- as.list(dummy_feature, field_names) + dummy_feature$clear() + } + message("")## +\n for previous message + message("Saving features") + dummy_feature$save(bulk=features) + message("Saved.") + return (context$clusters) +} + +extract_peak_var <- function(peak_list, var_name, selector=max) { + value <- peak_list[, var_name] + names(value) <- NULL + return (selector(value)) +} + +set_feature_fields_from_var_meta <- function(feature, var_meta) { + if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) { + feature$set_mz(mz) + } + if (!is.null(mzmin <- var_meta[["mzmin"]]) && !is.na(mzmin)) { + feature$set_mz_min(mzmin) + } + if (!is.null(mzmax <- var_meta[["mzmax"]]) && !is.na(mzmax)) { + feature$set_mz_max(mzmax) + } + if (!is.null(rt <- var_meta[["rt"]]) && !is.na(rt)) { + feature$set_rt(rt) + } + if (!is.null(rtmin <- var_meta[["rtmin"]]) && !is.na(rtmin)) { + feature$set_rt_min(rtmin) + } + if (!is.null(rtmax <- var_meta[["rtmax"]]) && !is.na(rtmax)) { + feature$set_rt_max(rtmax) + } + if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) { + feature$set_iso(isotopes) + } + return (feature) +} + +extract_iso <- function(weird_data) { + if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { + return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE)) + } + return (weird_data) +} + +extract_clusterID <- function(weird_data, next_cluster_id){ + if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { + clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]") + clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+")) + } else { + clusterID <- 0 + } + return (clusterID + next_cluster_id) +} + +create_associated_cluster <- function( + sample, feature, grouping_variable, + context, curent_var_meta, next_pc_group, next_align_group +) { + pcgroup <- as.numeric(curent_var_meta[["pcgroup"]]) + adduct <- as.character(curent_var_meta[["adduct"]]) + annotation <- curent_var_meta[["isotopes"]] + grouping_variable <- as.character(grouping_variable) + if (is.null(cluster <- context$clusters[[grouping_variable]])) { + cluster <- context$clusters[[grouping_variable]] <- orm$cluster( + pc_group=pcgroup + next_pc_group, + adduct=adduct, + align_group=next_align_group, + # curent_group=curent_group, + clusterID=context$clusterID, + annotation=annotation + )$set_sample(sample) + } else { + if (context$clusterID != 0 && cluster$get_clusterID() == 0) { + cluster$set_clusterID(context$clusterID) + } + } + cluster$save() + feature$set_cluster(cluster) + return (feature) +} + +complete_features <- function(orm, context) { + for (cluster in context$clusters) { + features <- orm$feature()$load_by(cluster_id=cluster$get_id()) + if (features$any()) { + if (!is.null(rt <- features$mean("rt"))) { + cluster$set_mean_rt(rt)$save() + } + features_df <- as.data.frame(features) + central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ] + central_feature_into <- central_feature[["int_o"]] + if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) { + for (feature in as.vector(features)) { + feature$set_abundance( + feature$get_int_o() / central_feature_into * 100 + )$save() + } + } + } + } + return (NULL) +} + +load_process_params <- function(orm, sample, params) { + for (param_list in params) { + if (is.null(param_list[["xfunction"]])) { + next + } + if (param_list[["xfunction"]] == "annotatediff") { + load_process_params_peak_picking(orm, sample, param_list) + } + } + return (sample) +} + +load_process_params_peak_picking <- function(orm, sample, peak_picking_params) { + return (add_sample_process_parameters( + params=peak_picking_params, + params_translation=list( + ppm="ppm", + maxcharge="maxCharge", + maxiso="maxIso" + ), + param_model_generator=orm$peak_picking_parameters, + sample_param_setter=sample$set_peak_picking_parameters + )) +} + +add_sample_process_parameters <- function( + params, + params_translation, + param_model_generator, + sample_param_setter +) { + model_params <- list() + for (rdata_param_name in names(params_translation)) { + database_param_name <- params_translation[[rdata_param_name]] + if (is.null(rdata_param <- params[[rdata_param_name]])) { + next + } + model_params[[database_param_name]] <- rdata_param + } + params_models <- do.call(param_model_generator()$load_by, model_params) + if (params_models$any()) { + params_model <- params_models$first() + } else { + params_model <- do.call(param_model_generator, model_params) + params_model$save() + } + return (sample_param_setter(params_model)$save()) +} + + +library(optparse) + +option_list <- list( + optparse::make_option( + c("-v", "--version"), + action="store_true", + help="Display this tool's version and exits" + ), + optparse::make_option( + c("-i", "--input"), + type="character", + help="The rdata path to import in XSeeker" + ), + optparse::make_option( + c("-s", "--samples"), + type="character", + help="Samples to visualise in XSeeker" + ), + optparse::make_option( + c("-B", "--archetype"), + type="character", + help="The name of the base database" + ), + optparse::make_option( + c("-b", "--database"), + type="character", + help="The base database's path" + ), + optparse::make_option( + c("-c", "--compounds-csv"), + type="character", + help="The csv containing compounds" + ), + optparse::make_option( + c("-m", "--models"), + type="character", + help="The path or url (must begin with http[s]:// or git@) to the database's models" + ), + optparse::make_option( + c("-o", "--output"), + type="character", + help="The path where to output sqlite" + ), + optparse::make_option( + c("-P", "--not-show-percent"), + action="store_true", + help="Flag not to show the percents", + default=FALSE + ) +) + +options(error=function(){traceback(3)}) + +parser <- OptionParser(usage="%prog [options] file", option_list=option_list) +args <- parse_args(parser, positional_arguments=0) + +err_code <- 0 + +if (!is.null(args$options$version)) { + message(sprintf("%s %s", TOOL_NAME, VERSION)) + quit() +} + +models <- get_models(args$options$models) +orm <- DBModelR::ORM( + connection_params=list(dbname=args$options$output), + dbms="SQLite" +) + +invisible(orm$models(models)) +invisible(create_database(orm)) + +message("Database model created") + +insert_adducts(orm) + +if (!is.null(args$options$database)) { + insert_base_data(orm, args$options$database) +} +message(sprintf("Base data inserted using %s.", args$options$database)) + +if (!is.null(args$options$archetype)) { + insert_base_data(orm, args$options$archetype, archetype=TRUE) +} +if (!is.null(args$options$`compounds-csv`)) { + insert_compounds(orm, args$options$`compounds-csv`) +} + +# if (!is.null(args$options$rdata)) { +# load_rdata_in_base(args$options$rdata, args$options$samples, args$options$`not-show-percent`) +# } + + +load(args$options$input, rdata <- new.env()) + +process_rdata(orm, rdata, args$options) + +quit(status=err_code) + + diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIE/tools/LC-MSMS/XSeekerPreparator.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIE/tools/LC-MSMS/XSeekerPreparator.xml Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,335 @@ + + Prepare RData file from CAMERA to be visualized in XSeeker + + + operation_1812 + operation_0335 + + + + /home/lpavot/R/bin/Rscript + + + + + + + + + + + + + /home/lpavot/R/bin/Rscript '$__tool_directory__/XSeekerPreparator.R' -v + + + + + + + + + + +
+ + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + + + + + +tryCatch({ + DBModelR::ModelDefinition(table="yui", fields=list(yui="INTEGER")) +}, error=function(e) { + stop("Please, install DBModelR before you source this file.") +}) + +list( + adduct=DBModelR::ModelDefinition( + table="adduct", + fields=list( + name="TEXT", + mass="FLOAT", + charge="INTEGER", + multi="INTEGER", + formula_add="TEXT", + formula_ded="TEXT", + sign="TEXT", + oidscore="INTEGER", + quasi="INTEGER", + ips="FLOAT" + ) + ), + cluster=DBModelR::ModelDefinition( + table="cluster", + fields=list( + clusterID="INTEGER", + formula="TEXT", + annotation="TEXT", + coeff="FLOAT", + r_squared="FLOAT", + charge="INTEGER", + mean_rt="FLOAT", + score="FLOAT", + deviation="FLOAT", + status="TEXT", + adduct="TEXT", + curent_group="INTEGER", + pc_group="INTEGER", + align_group="INTEGER", + xcms_group="INTEGER" + ), + one=list("sample", "compound") + ), + compound=DBModelR::ModelDefinition( + table="compound", + fields=list( + name="TEXT", + common_name="TEXT", + formula="TEXT", + charge="INTEGER", + date="TEXT", + mz="FLOAT" + ) + ), + feature=DBModelR::ModelDefinition( + table="feature", + fields=list( + featureID="INTEGER", + mz="FLOAT", + mz_min="FLOAT", + mz_max="FLOAT", + rt="FLOAT", + rt_min="FLOAT", + rt_max="FLOAT", + int_o="FLOAT", + int_b="FLOAT", + max_o="FLOAT", + iso="TEXT", + abundance="FLOAT" + ), + one=list("cluster") + ), + instrument=DBModelR::ModelDefinition( + table="instrument", + fields=list( + model="TEXT", + manufacturer="TEXT", + analyzer="TEXT", + detector_type="TEXT", + ion_source="TEXT" + ) + ), + instrument_config=DBModelR::ModelDefinition( + table="instrument_config", + fields=list( + resolution="TEXT", + agc_target="TEXT", + maximum_IT="TEXT", + number_of_scan_range="TEXT", + scan_range="TEXT", + version="TEXT" + ) + ), + project=DBModelR::ModelDefinition( + table="project", + fields=list( + name="TEXT", + comment="TEXT" + ), + one=list("sample") + ), + sample=DBModelR::ModelDefinition( + table="sample", + fields=list( + name="TEXT", + path="TEXT", + polarity="TEXT", + kind="TEXT", ## rdata or mxml or enriched_rdata + raw="BLOB" + ), + one=list( + "peak_picking_parameters", + "pairing_parameters", + "alignmenmt_parameters", + "camera_parameters", + "instrument", + "instrument_config", + "software", + "smol_xcms_set" + ) + ), + smol_xcms_set=DBModelR::ModelDefinition( + table="smol_xcms_set", + fields=list( + raw="BLOB" + ) + ), + software=DBModelR::ModelDefinition( + table="software", + fields=list( + name="TEXT", + version="TEXT" + ) + ), + peak_picking_parameters=DBModelR::ModelDefinition( + table="peak_picking_parameters", + fields=list( + ppm="FLOAT", + peakwidth="TEXT", + snthresh="TEXT", + prefilterStep="TEXT", + prefilterLevel="TEXT", + mzdiff="TEXT", + fitgauss="TEXT", + noise="TEXT", + mzCenterFun="TEXT", + integrate="INTEGER", + firstBaselineCheck="TEXT", + snthreshIsoROIs="TEXT", + maxCharge="INTEGER", + maxIso="INTEGER", + mzIntervalExtension="TEXT" + ) + ), + alignmenmt_parameters=DBModelR::ModelDefinition( + table="alignmenmt_parameters", + fields=list( + binSize="TEXT", + centerSample="TEXT", + response="TEXT", + distFun="TEXT", + gapInit="TEXT", + gapExtend="TEXT", + factorDiag="TEXT", + factorGap="TEXT", + localAlignment="INTEGER", + initPenalty="TEXT", + bw="TEXT", + minFraction="TEXT", + minSamples="TEXT", + maxFeatures="TEXT" + ) + ) +) + + +
diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIT/config/galaxy.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIT/config/galaxy.yml Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,54 @@ +diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample +index b0fe163e6d..975acbf855 100644 +--- a/lib/galaxy/config/sample/galaxy.yml.sample ++++ b/lib/galaxy/config/sample/galaxy.yml.sample +@@ -105,6 +105,30 @@ uwsgi: + # to be in the same group as the Galaxy system user + umask: 027 + ++ http: localhost:8080 ++ ++ threads: 8 ++ ++ http-raw-body: True ++ ++ offload-threads: 8 ++ ++ master: true ++ ++ module: galaxy.webapps.galaxy.buildapp:uwsgi_app() ++ ++ interactivetools_enable: true ++ interactivetools_map: database/interactivetools_map.sqlite ++ python-raw: scripts/interactivetools/key_type_token_mapping.py ++ route-host: ^([A-Za-z0-9]+(?:-[A-Za-z0-9]+)*)-([A-Za-z0-9]+(?:-[A-Za-z0-9]+)*)\.([A-Za-z0-9]+(?:-[A-Za-z0-9]+)*)\.(interactivetool\.localhost:8080)$ goto:interactivetool ++ route-run: goto:endendend ++ route-label: interactivetool ++ route-host: ^([A-Za-z0-9]+(?:-[A-Za-z0-9]+)*)-([A-Za-z0-9]+(?:-[A-Za-z0-9]+)*)\.([A-Za-z0-9]+(?:-[A-Za-z0-9]+)*)\.(interactivetool\.localhost:8080)$ rpcvar:TARGET_HOST rtt_key_type_token_mapper_cached $1 $3 $2 $4 $0 5 ++ route-if-not: empty:${TARGET_HOST} httpdumb:${TARGET_HOST} ++ route: .* break:404 Not Found ++ route-label: endendend ++ ++ + galaxy: + + # The directory that will be prepended to relative paths in options +@@ -2091,3 +2115,18 @@ galaxy: + # Path to dynamic tool destinations configuration file. + #tool_destinations_config_file: tool_destinations.yml + ++ ++ interactivetools_enable: true ++ # outputs_to_working_directory will provide you with a better level of isolation. It is highly recommended to set ++ # this parameter with InteractiveTools. ++ outputs_to_working_directory: true ++ interactivetools_prefix: interactivetool ++ interactivetools_map: database/interactivetools_map.sqlite ++ # If you develop InteractiveTools locally and do not have a full FQDN you can ++ # use an arbritrary one, e.g. 'my-hostname' here, if you set this hostname in your ++ # job_conf.xml as well (see the corresponding comment). If running mac OS X, do not match the ++ # "http://host.docker.internal:8080" used at galaxy_infrastructure_url in the galaxy.yml file, ++ # and use an arbitrary name here instead. ++ # Please make sure that in the local development case you use https://localhost:8080 to access ++ # your Galaxy. http://my-hostname:8080 will not work. ++ # galaxy_infrastructure_url: http://my-hostname:8080 diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIT/config/galaxy.yml.interactivetools --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIT/config/galaxy.yml.interactivetools Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,20 @@ +diff --git a/config/galaxy.yml.interactivetools b/config/galaxy.yml.interactivetools +index fe2d589208..4610c0b68f 100644 +--- a/config/galaxy.yml.interactivetools ++++ b/config/galaxy.yml.interactivetools +@@ -12,6 +12,7 @@ uwsgi: + + module: galaxy.webapps.galaxy.buildapp:uwsgi_app() + ++ interactivetools_enable: true + interactivetools_map: database/interactivetools_map.sqlite + python-raw: scripts/interactivetools/key_type_token_mapping.py + route-host: ^([A-Za-z0-9]+(?:-[A-Za-z0-9]+)*)-([A-Za-z0-9]+(?:-[A-Za-z0-9]+)*)\.([A-Za-z0-9]+(?:-[A-Za-z0-9]+)*)\.(interactivetool\.localhost:8080)$ goto:interactivetool +@@ -23,6 +24,7 @@ uwsgi: + route-label: endendend + + ++ + galaxy: + interactivetools_enable: true + # outputs_to_working_directory will provide you with a better level of isolation. It is highly recommended to set diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIT/config/tool_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIT/config/tool_conf.xml Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,24 @@ +diff --git a/lib/galaxy/config/sample/tool_conf.xml.sample b/lib/galaxy/config/sample/tool_conf.xml.sample +index 286a0c9f1c..570a7a6193 100644 +--- a/lib/galaxy/config/sample/tool_conf.xml.sample ++++ b/lib/galaxy/config/sample/tool_conf.xml.sample +@@ -124,15 +124,16 @@ + + +
+- ++ ++ ++ +
diff -r 000000000000 -r 15c9fbefeaf1 resources/galaxy/GIT/tools/interactive/xseeker.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resources/galaxy/GIT/tools/interactive/xseeker.xml Tue Feb 01 14:19:30 2022 +0000 @@ -0,0 +1,45 @@ + + Webbased Interactive bidules visualization + + xseeker + + + + / + 8765 + + + + TRUE + http://172.17.0.1:8080/ + $__history_id__ + + + + + + + + + + + + + + + + + i love to talk to me myself + +