view matrix-service.xml @ 2:c3c85b67d118 draft default tip

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 81ed2d7aaad0f5536d835e75e483c39c3984d565"
author ebi-gxa
date Sun, 27 Dec 2020 00:00:19 +0000
parents 2e81fbe036b2
children
line wrap: on
line source

<?xml version="1.0" encoding="utf-8"?>
<tool id="hca_matrix_downloader" name="Human Cell Atlas Matrix Downloader" version="v0.0.4+galaxy0">
  <description>retrieves expression matrices and metadata from the Human Cell Atlas.</description>
  <requirements>
    <requirement type="package" version="0.0.4">hca-matrix-downloader</requirement>
  </requirements>
  <command detect_errors="exit_code"><![CDATA[

hca-matrix-downloader -p '${project}' -o out -f '${matrix_format}'

#if $species:
  -s ${species}
#end if
#if $matrix_format == "mtx":
  && hca-mtx-to-10x out.mtx .
  && gunzip -c out.mtx/cells.tsv.gz > exp_design.tsv
#end if

]]></command>

  <inputs>
    <param name="project" type="text" value="Single cell transcriptome analysis of human pancreas" label="Human Cell Atlas project name/label/UUID" help="HCA project identifier, can be project title, project label or project UUID."/>
    <param name="matrix_format" type="select" label="Choose the format of matrix to download" help="Matrix Market or Loom">
      <option value="mtx" selected="true">Matrix Market</option>
      <option value="loom">Loom</option>
    </param>
    <param name="species" argument="--species" optional="true" type="text" label="Species to use" help="Certain studies will contain more than one species, in that case, one should be given as input. If you try to download data from such a study, the tool will fail and give you a list in the Std. out of what are the potential species to paste here. This is not needed for most studies that have a single species."/>
  </inputs>

  <outputs>
    <data name="matrix_mtx" format="txt" from_work_dir="matrix.mtx" label="${tool.name} on ${on_string} matrix.mtx">
      <filter>matrix_format=="mtx"</filter>
    </data>
    <data name="genes_tsv" format="tsv" from_work_dir="genes.tsv" label="${tool.name} on ${on_string} genes.tsv">
      <filter>matrix_format=="mtx"</filter>
    </data>
    <data name="barcode_tsv" format="tsv" from_work_dir="barcodes.tsv" label="${tool.name} on ${on_string} barcodes.tsv">
      <filter>matrix_format=="mtx"</filter>
    </data>
    <data name="cells_meta_tsv" format="tsv" from_work_dir="exp_design.tsv" label="${tool.name} on ${on_string} exp_design.tsv">
      <filter>matrix_format=="mtx"</filter>
    </data>
    <data name="matrix_loom" format="h5" from_work_dir="out.loom" label="${tool.name} on ${on_string} matrix.loom">
      <filter>matrix_format=="loom"</filter>
    </data>
  </outputs>

  <tests>
    <test>
      <param name="project" value="cddab57b-6868-4be4-806f-395ed9dd635a"/>
      <param name="matrix_format" value="mtx"/>
      <output name="matrix_mtx" md5="0549fc0260eb6974affb115ac80b6e85" ftype="txt"/>
      <output name="genes_tsv" md5="7e131ba105f713d2f9766d61c246d0da" ftype="tsv"/>
      <output name="barcode_tsv" md5="327926a684ad8eaee5de8243c32b415c" ftype="tsv"/>
      <output name="cells_meta_tsv" md5="defa674847e3e1877ab1957a3291b28d" ftype="tsv"/>
    </test>
    <test>
      <param name="project" value="f8aa201c-4ff1-45a4-890e-840d63459ca2"/>
      <param name="matrix_format" value="loom"/>
      <param name="species" value="homo_sapiens"/>
      <output name="matrix_loom" md5="9ba6bd109ec271c338251c5519ea9f5d" ftype="h5"/>
    </test>
  </tests>

  <help><![CDATA[
===========================================================================
Down expression matrix from HCA projects using HCA DCP's matrix service API
===========================================================================

The data retrieval tool presented here allows the user to retrieve expression matrices
and metadata for any public experiment available at Human Cell Atlas data portal.

To use it, simply set the name, or label, or ID for the desired project, which can be
found at the HCA data browser (https://data.humancellatlas.org/explore/projects),
and select the desired matrix format (Matrix Market or Loom).

For projects that have more than one organism, one needs to be specified. If none
is specified, then the job will fail and the available options to be specified
will be listed in the stdout of the job.

Outputs will be:

- *When "Matrix Market" is seleted, outputs are in 10X-compatible Matrix Market format:*

  1. **Matrix (txt):**

     Contains the expression values for genes (rows) and cells (columns) in raw counts. This
     text file is formatted as a Matrix Market file, and as such it is accompanied by separate
     files for the gene identifiers and the cells identifiers.

  2. **Genes (tsv):**

     Identifiers (column repeated) for the genes present in the matrix of expression,
     in the same order as the matrix rows.

  3. **Barcodes (tsv):**

     Identifiers for the cells of the data matrix. The file is ordered to match the columns
     of the matrix.

  4. **Experiment Design file (tsv):**

     Contains metadata for the different cells of the experiment.

- *When "Loom" is selected, output is a single Loom HDF5 file:*

  1. **Loom (h5):**

     Contains expression values for genes (rows) and cells (columns) in raw counts, cell
     metadata table and gene metadata table, in a single HDF5 file with specification defined
     in http://linnarssonlab.org/loompy/format/index.html.

**Version history**

     0.0.4+galaxy0: Retrieves data from EBI FTP until an equivalent Matrix service for DCP 2.0 is established. Deals with multi organisms studies.

     0.0.2+galaxy0: Initial contribution. Ni Huang and Pablo Moreno, Teichmann Lab at Wellcome Sanger Institute and
     Expression Atlas team https://www.ebi.ac.uk/gxa/home  at EMBL-EBI https://www.ebi.ac.uk/.

]]></help>
  <citations>
    <citation type="doi">10.7554/eLife.27041</citation>
    <citation type="doi">10.1101/2020.04.08.032698</citation>
  </citations>
</tool>