comparison retrieve-scxa.xml @ 0:cd6b80f62fcc draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 9bf9a6e46a330890be932f60d1d996dd166426c4
author ebi-gxa
date Wed, 03 Apr 2019 12:01:53 -0400
parents
children cc21614b6693
comparison
equal deleted inserted replaced
-1:000000000000 0:cd6b80f62fcc
1 <?xml version="1.0" encoding="utf-8"?>
2 <tool id="retrieve_scxa" name="EBI SCXA Data Retrieval" version="v0.0.1+galaxy1">
3 <description>Retrieves expression matrixes and metadata from EBI Single Cell Expression Atlas (SCXA)</description>
4 <requirements>
5 <requirement type="package" version="1.18">gnu-wget</requirement>
6 </requirements>
7 <command detect_errors="exit_code"><![CDATA[
8
9 #if str($matrix_type) == "tpm":
10
11 wget -O exp_quant.zip
12 'https://www.ebi.ac.uk/gxa/sc/experiment/${accession}/download/zip?fileType=quantification-filtered&accessKey=' &&
13 unzip exp_quant.zip;
14 mv '${accession}'.expression_tpm.mtx ${matrix_mtx} &&
15 awk '{OFS="\t"; print \$2,\$2}' '${accession}'.expression_tpm.mtx_rows > ${genes_tsv} &&
16 cut -f2 '${accession}'.expression_tpm.mtx_cols > ${barcode_tsv};
17
18 #else if str($matrix_type) == "raw":
19
20 wget -O ${matrix_mtx} 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/sc_experiments/${accession}/${accession}.aggregated_filtered_counts.mtx';
21 wget -qO - 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/sc_experiments/${accession}/${accession}.aggregated_filtered_counts.mtx_cols' | cut -f2 > ${barcode_tsv};
22 wget -qO - 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/sc_experiments/${accession}/${accession}.aggregated_filtered_counts.mtx_rows'
23 | awk '{OFS="\t"; print \$2,\$2}' > ${genes_tsv};
24
25 #end if
26
27 wget -O exp_design.tsv
28 'https://www.ebi.ac.uk/gxa/sc/experiment/${accession}/download?fileType=experiment-design&accessKey=';
29
30 ]]></command>
31
32 <inputs>
33 <param name="accession" type="text" value="E-GEOD-100058" label="SC-Atlas experiment accession" help="EBI Single Cell Atlas accession for the experiment that you want to retrieve."/>
34 <param name="matrix_type" type="select" label="Choose the type of matrix to download" help="Raw filtered counts or (non-filtered) TPMs">
35 <option value="raw" selected="true">Raw filtered counts</option>
36 <option value="tpm">TPMs</option>
37 </param>
38 </inputs>
39
40 <outputs>
41 <data name="matrix_mtx" format="txt" label="${tool.name} on ${on_string} ${accession} matrix.mtx (${matrix_type.value_label})"/>
42 <data name="genes_tsv" format="tsv" label="${tool.name} on ${on_string} ${accession} genes.tsv (${matrix_type.value_label})"/>
43 <data name="barcode_tsv" format="tsv" label="${tool.name} on ${on_string} ${accession} barcodes.tsv (${matrix_type.value_label})"/>
44 <data name="design_tsv" format="tsv" from_work_dir="exp_design.tsv" label="${tool.name} on ${on_string} ${accession} exp_design.tsv"/>
45 </outputs>
46
47 <tests>
48 <test>
49 <param name="accession" value="E-GEOD-100058"/>
50 <param name="matrix_type" value="tpm"/>
51 <output name="matrix_mtx" file="E-GEOD-100058.expression_tpm.mtx" ftype="txt"/>
52 <output name="genes_tsv" file="E-GEOD-100058.genes.tsv" ftype="tsv"/>
53 <output name="barcode_tsv" file="E-GEOD-100058.barcodes.tsv" ftype="tsv"/>
54 <output name="design_tsv" file="E-GEOD-100058.exp_design.tsv" ftype="tsv"/>
55 </test>
56 </tests>
57
58 <help><![CDATA[
59 =================================================================================
60 Gene expression analysis in single cells across species and biological conditions
61 =================================================================================
62
63 Single Cell Expression Atlas supports research in single cell transcriptomics.
64 The Atlas annotates publicly available single cell RNA-Seq experiments with
65 ontology identifiers and re-analyses them using standardised pipelines available
66 through iRAP, our RNA-Seq analysis toolkit. The browser enables visualisation of
67 clusters of cells, their annotations and supports searches for gene expression
68 within and across studies.
69
70 For more information check https://www.ebi.ac.uk/gxa/sc/home
71
72 EBI SCXA Data Retrieval
73 -----------------------
74
75 The data retrieval tool presented here allows the user to retrieve expression matrices
76 and metadata for any public experiment available at EBI Single Cell Expression Atlas.
77
78 To use it, simply set the accession for the desired experiment and choose the type of
79 matrix that you want to download:
80
81 :Raw filtered counts:
82 This should be the default choice for running clustering and another analysis
83 methods where you will introduce scaling and normalization of the data. The filtering
84 is based on the quality control applied by iRAP prior to pseudo-alignment and quantification.
85
86 :TPMs:
87 TPM stands for Transcripts Per Kilobase Million, and as the name implies, this has been
88 already normalized/scaled. You should keep this in mind when using this data
89 on methods that will try to normalise data as part of their procedure. Due to technical
90 particularities in the current Atlas SC pipeline, TPMs available here are not filtered.
91
92 Outputs will be:
93
94 :Matrix (txt):
95 Contains the expression values for genes (rows) and samples/runs/cells (columns),
96 in either raw filtered counts or filtered tpms depending on the choice made. This
97 text file is formatted as a Matrix Market file, and as such it is accompanied by
98 separate files for the gene identifiers and the samples/runs/cells identifiers.
99
100 :Genes (tsv):
101 Identifiers (column repeated) for the genes present in the matrix of expression,
102 in the same order as the matrix rows.
103
104 :Barcodes (tsv):
105 Identifiers for the cells, samples or runs of the data matrix. The file is ordered
106 to match the columns of the matrix.
107
108 :Experiment Design file (tsv):
109 Contains metadata for the different cells/samples/runs of the experiment.
110 Please note that this file is generated before the filtering step, and while not
111 often, it might be the case that it contains more cells/samples/runs than the matrix.
112
113 ]]></help>
114 <citations>
115 <citation type="doi">10.1093/nar/gkv1045</citation>
116 </citations>
117 </tool>