changeset 0:cd6b80f62fcc draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 9bf9a6e46a330890be932f60d1d996dd166426c4
author ebi-gxa
date Wed, 03 Apr 2019 12:01:53 -0400
parents
children cc21614b6693
files retrieve-scxa.xml
diffstat 1 files changed, 117 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/retrieve-scxa.xml	Wed Apr 03 12:01:53 2019 -0400
@@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="utf-8"?>
+<tool id="retrieve_scxa" name="EBI SCXA Data Retrieval" version="v0.0.1+galaxy1">
+  <description>Retrieves expression matrixes and metadata from EBI Single Cell Expression Atlas (SCXA)</description>
+  <requirements>
+    <requirement type="package" version="1.18">gnu-wget</requirement>
+  </requirements>
+  <command detect_errors="exit_code"><![CDATA[
+
+#if str($matrix_type) == "tpm":
+
+wget -O exp_quant.zip
+    'https://www.ebi.ac.uk/gxa/sc/experiment/${accession}/download/zip?fileType=quantification-filtered&accessKey=' &&
+unzip exp_quant.zip;
+mv '${accession}'.expression_tpm.mtx ${matrix_mtx} &&
+awk '{OFS="\t"; print \$2,\$2}' '${accession}'.expression_tpm.mtx_rows > ${genes_tsv} &&
+cut -f2 '${accession}'.expression_tpm.mtx_cols > ${barcode_tsv};
+
+#else if str($matrix_type) == "raw":
+
+wget -O ${matrix_mtx} 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/sc_experiments/${accession}/${accession}.aggregated_filtered_counts.mtx';
+wget -qO - 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/sc_experiments/${accession}/${accession}.aggregated_filtered_counts.mtx_cols' | cut -f2 > ${barcode_tsv};
+wget -qO - 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/sc_experiments/${accession}/${accession}.aggregated_filtered_counts.mtx_rows'
+ | awk '{OFS="\t"; print \$2,\$2}' > ${genes_tsv};
+
+#end if
+
+wget -O exp_design.tsv
+    'https://www.ebi.ac.uk/gxa/sc/experiment/${accession}/download?fileType=experiment-design&accessKey=';
+
+]]></command>
+
+  <inputs>
+    <param name="accession" type="text" value="E-GEOD-100058" label="SC-Atlas experiment accession" help="EBI Single Cell Atlas accession for the experiment that you want to retrieve."/>
+    <param name="matrix_type" type="select" label="Choose the type of matrix to download" help="Raw filtered counts or (non-filtered) TPMs">
+      <option value="raw" selected="true">Raw filtered counts</option>
+      <option value="tpm">TPMs</option>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="matrix_mtx" format="txt" label="${tool.name} on ${on_string} ${accession} matrix.mtx (${matrix_type.value_label})"/>
+    <data name="genes_tsv" format="tsv" label="${tool.name} on ${on_string} ${accession} genes.tsv (${matrix_type.value_label})"/>
+    <data name="barcode_tsv" format="tsv" label="${tool.name} on ${on_string} ${accession} barcodes.tsv (${matrix_type.value_label})"/>
+    <data name="design_tsv" format="tsv" from_work_dir="exp_design.tsv" label="${tool.name} on ${on_string} ${accession} exp_design.tsv"/>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="accession" value="E-GEOD-100058"/>
+      <param name="matrix_type" value="tpm"/>
+      <output name="matrix_mtx" file="E-GEOD-100058.expression_tpm.mtx" ftype="txt"/>
+      <output name="genes_tsv" file="E-GEOD-100058.genes.tsv" ftype="tsv"/>
+      <output name="barcode_tsv" file="E-GEOD-100058.barcodes.tsv" ftype="tsv"/>
+      <output name="design_tsv" file="E-GEOD-100058.exp_design.tsv" ftype="tsv"/>
+    </test>
+  </tests>
+
+  <help><![CDATA[
+=================================================================================
+Gene expression analysis in single cells across species and biological conditions
+=================================================================================
+
+Single Cell Expression Atlas supports research in single cell transcriptomics.
+The Atlas annotates publicly available single cell RNA-Seq experiments with
+ontology identifiers and re-analyses them using standardised pipelines available
+through iRAP, our RNA-Seq analysis toolkit. The browser enables visualisation of
+clusters of cells, their annotations and supports searches for gene expression
+within and across studies.
+
+For more information check https://www.ebi.ac.uk/gxa/sc/home
+
+EBI SCXA Data Retrieval
+-----------------------
+
+The data retrieval tool presented here allows the user to retrieve expression matrices
+and metadata for any public experiment available at EBI Single Cell Expression Atlas.
+
+To use it, simply set the accession for the desired experiment and choose the type of
+matrix that you want to download:
+
+:Raw filtered counts:
+  This should be the default choice for running clustering and another analysis
+  methods where you will introduce scaling and normalization of the data. The filtering
+  is based on the quality control applied by iRAP prior to pseudo-alignment and quantification.
+
+:TPMs:
+  TPM stands for Transcripts Per Kilobase Million, and as the name implies, this has been
+  already normalized/scaled. You should keep this in mind when using this data
+  on methods that will try to normalise data as part of their procedure. Due to technical
+  particularities in the current Atlas SC pipeline, TPMs available here are not filtered.
+
+Outputs will be:
+
+:Matrix (txt):
+  Contains the expression values for genes (rows) and samples/runs/cells (columns),
+  in either raw filtered counts or filtered tpms depending on the choice made. This
+  text file is formatted as a Matrix Market file, and as such it is accompanied by
+  separate files for the gene identifiers and the samples/runs/cells identifiers.
+
+:Genes (tsv):
+  Identifiers (column repeated) for the genes present in the matrix of expression,
+  in the same order as the matrix rows.
+
+:Barcodes (tsv):
+  Identifiers for the cells, samples or runs of the data matrix. The file is ordered
+  to match the columns of the matrix.
+
+:Experiment Design file (tsv):
+  Contains metadata for the different cells/samples/runs of the experiment.
+  Please note that this file is generated before the filtering step, and while not
+  often, it might be the case that it contains more cells/samples/runs than the matrix.
+
+]]></help>
+  <citations>
+    <citation type="doi">10.1093/nar/gkv1045</citation>
+  </citations>
+</tool>