Mercurial > repos > ebi-gxa > retrieve_scxa
comparison retrieve-scxa.xml @ 0:cd6b80f62fcc draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 9bf9a6e46a330890be932f60d1d996dd166426c4
author | ebi-gxa |
---|---|
date | Wed, 03 Apr 2019 12:01:53 -0400 |
parents | |
children | cc21614b6693 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cd6b80f62fcc |
---|---|
1 <?xml version="1.0" encoding="utf-8"?> | |
2 <tool id="retrieve_scxa" name="EBI SCXA Data Retrieval" version="v0.0.1+galaxy1"> | |
3 <description>Retrieves expression matrixes and metadata from EBI Single Cell Expression Atlas (SCXA)</description> | |
4 <requirements> | |
5 <requirement type="package" version="1.18">gnu-wget</requirement> | |
6 </requirements> | |
7 <command detect_errors="exit_code"><![CDATA[ | |
8 | |
9 #if str($matrix_type) == "tpm": | |
10 | |
11 wget -O exp_quant.zip | |
12 'https://www.ebi.ac.uk/gxa/sc/experiment/${accession}/download/zip?fileType=quantification-filtered&accessKey=' && | |
13 unzip exp_quant.zip; | |
14 mv '${accession}'.expression_tpm.mtx ${matrix_mtx} && | |
15 awk '{OFS="\t"; print \$2,\$2}' '${accession}'.expression_tpm.mtx_rows > ${genes_tsv} && | |
16 cut -f2 '${accession}'.expression_tpm.mtx_cols > ${barcode_tsv}; | |
17 | |
18 #else if str($matrix_type) == "raw": | |
19 | |
20 wget -O ${matrix_mtx} 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/sc_experiments/${accession}/${accession}.aggregated_filtered_counts.mtx'; | |
21 wget -qO - 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/sc_experiments/${accession}/${accession}.aggregated_filtered_counts.mtx_cols' | cut -f2 > ${barcode_tsv}; | |
22 wget -qO - 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/atlas/sc_experiments/${accession}/${accession}.aggregated_filtered_counts.mtx_rows' | |
23 | awk '{OFS="\t"; print \$2,\$2}' > ${genes_tsv}; | |
24 | |
25 #end if | |
26 | |
27 wget -O exp_design.tsv | |
28 'https://www.ebi.ac.uk/gxa/sc/experiment/${accession}/download?fileType=experiment-design&accessKey='; | |
29 | |
30 ]]></command> | |
31 | |
32 <inputs> | |
33 <param name="accession" type="text" value="E-GEOD-100058" label="SC-Atlas experiment accession" help="EBI Single Cell Atlas accession for the experiment that you want to retrieve."/> | |
34 <param name="matrix_type" type="select" label="Choose the type of matrix to download" help="Raw filtered counts or (non-filtered) TPMs"> | |
35 <option value="raw" selected="true">Raw filtered counts</option> | |
36 <option value="tpm">TPMs</option> | |
37 </param> | |
38 </inputs> | |
39 | |
40 <outputs> | |
41 <data name="matrix_mtx" format="txt" label="${tool.name} on ${on_string} ${accession} matrix.mtx (${matrix_type.value_label})"/> | |
42 <data name="genes_tsv" format="tsv" label="${tool.name} on ${on_string} ${accession} genes.tsv (${matrix_type.value_label})"/> | |
43 <data name="barcode_tsv" format="tsv" label="${tool.name} on ${on_string} ${accession} barcodes.tsv (${matrix_type.value_label})"/> | |
44 <data name="design_tsv" format="tsv" from_work_dir="exp_design.tsv" label="${tool.name} on ${on_string} ${accession} exp_design.tsv"/> | |
45 </outputs> | |
46 | |
47 <tests> | |
48 <test> | |
49 <param name="accession" value="E-GEOD-100058"/> | |
50 <param name="matrix_type" value="tpm"/> | |
51 <output name="matrix_mtx" file="E-GEOD-100058.expression_tpm.mtx" ftype="txt"/> | |
52 <output name="genes_tsv" file="E-GEOD-100058.genes.tsv" ftype="tsv"/> | |
53 <output name="barcode_tsv" file="E-GEOD-100058.barcodes.tsv" ftype="tsv"/> | |
54 <output name="design_tsv" file="E-GEOD-100058.exp_design.tsv" ftype="tsv"/> | |
55 </test> | |
56 </tests> | |
57 | |
58 <help><![CDATA[ | |
59 ================================================================================= | |
60 Gene expression analysis in single cells across species and biological conditions | |
61 ================================================================================= | |
62 | |
63 Single Cell Expression Atlas supports research in single cell transcriptomics. | |
64 The Atlas annotates publicly available single cell RNA-Seq experiments with | |
65 ontology identifiers and re-analyses them using standardised pipelines available | |
66 through iRAP, our RNA-Seq analysis toolkit. The browser enables visualisation of | |
67 clusters of cells, their annotations and supports searches for gene expression | |
68 within and across studies. | |
69 | |
70 For more information check https://www.ebi.ac.uk/gxa/sc/home | |
71 | |
72 EBI SCXA Data Retrieval | |
73 ----------------------- | |
74 | |
75 The data retrieval tool presented here allows the user to retrieve expression matrices | |
76 and metadata for any public experiment available at EBI Single Cell Expression Atlas. | |
77 | |
78 To use it, simply set the accession for the desired experiment and choose the type of | |
79 matrix that you want to download: | |
80 | |
81 :Raw filtered counts: | |
82 This should be the default choice for running clustering and another analysis | |
83 methods where you will introduce scaling and normalization of the data. The filtering | |
84 is based on the quality control applied by iRAP prior to pseudo-alignment and quantification. | |
85 | |
86 :TPMs: | |
87 TPM stands for Transcripts Per Kilobase Million, and as the name implies, this has been | |
88 already normalized/scaled. You should keep this in mind when using this data | |
89 on methods that will try to normalise data as part of their procedure. Due to technical | |
90 particularities in the current Atlas SC pipeline, TPMs available here are not filtered. | |
91 | |
92 Outputs will be: | |
93 | |
94 :Matrix (txt): | |
95 Contains the expression values for genes (rows) and samples/runs/cells (columns), | |
96 in either raw filtered counts or filtered tpms depending on the choice made. This | |
97 text file is formatted as a Matrix Market file, and as such it is accompanied by | |
98 separate files for the gene identifiers and the samples/runs/cells identifiers. | |
99 | |
100 :Genes (tsv): | |
101 Identifiers (column repeated) for the genes present in the matrix of expression, | |
102 in the same order as the matrix rows. | |
103 | |
104 :Barcodes (tsv): | |
105 Identifiers for the cells, samples or runs of the data matrix. The file is ordered | |
106 to match the columns of the matrix. | |
107 | |
108 :Experiment Design file (tsv): | |
109 Contains metadata for the different cells/samples/runs of the experiment. | |
110 Please note that this file is generated before the filtering step, and while not | |
111 often, it might be the case that it contains more cells/samples/runs than the matrix. | |
112 | |
113 ]]></help> | |
114 <citations> | |
115 <citation type="doi">10.1093/nar/gkv1045</citation> | |
116 </citations> | |
117 </tool> |