changeset 0:970696a7ae0b draft

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 355bb52d2e9d170b1db237e649657cc14e0a047a"
author ebi-gxa
date Fri, 04 Mar 2022 07:28:21 +0000
parents
children 834f7d8e5b4c
files get_test_data.sh seurat_convert.xml seurat_macros.xml
diffstat 3 files changed, 298 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_test_data.sh	Fri Mar 04 07:28:21 2022 +0000
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+BASENAME_FILE='E-MTAB-6077-3k_features_90_cells'
+
+MTX_LINK='https://drive.google.com/uc?export=download&id=1-1ejn7scP80xsbrG0FtWzsozjg0hhc23'
+RDS_LINK='https://drive.google.com/uc?export=download&id=1KW_GX6xznSUpWRWUykpNaSbAhyClf7_n'
+NORM_LINK='https://drive.google.com/uc?export=download&id=1mvo3ENkBvEAOyWG6ejApzQTPDLX5yBKU'
+FVG_LINK='https://drive.google.com/uc?export=download&id=13Fhruuj-vEEo1WM138ahtAYqfHc7LsaZ'
+SCALED_LINK='https://drive.google.com/uc?export=download&id=18TK8us235LWNajarWDBAtASUXMYAxvw0'
+PCA_LINK='https://drive.google.com/uc?export=download&id=1gf3BTB4dygDsom1TzjsBfgZnZepcoG5c'
+NEIGHBOURS_LINK='https://drive.google.com/uc?export=download&id=1N2lHoKRBZ7pmAYGfghLWB9KUrLA5WoNX'
+CLUSTERS_LINK='https://drive.google.com/uc?export=download&id=1HWxZWHbNUNo4z__9PhhL_CJOLzec_ETa'
+TSNE_LINK='https://drive.google.com/uc?export=download&id=1qsvMr_GkCSp1dyTJt1BZ6cElJwFFX2zO'
+MARKERS_LINK='https://drive.google.com/uc?export=download&id=18OmWNc7mF-4pzH6DQkPp1eKunN4BfvxD'
+
+LOOM_LINK='https://drive.google.com/uc?export=download&id=1qNk5cg8hJG3Nv1ljTKmUEnxTOf11EEZX'
+H5AD_LINK='https://drive.google.com/uc?export=download&id=1YpE0H_t_dkh17P-WBhPijKvRiGP0BlBz'
+SCE_LINK='https://drive.google.com/uc?export=download&id=1UKdyf3M01uAt7oBg93JfmRvNVB_jlUKe'
+
+function get_data {
+  local link=$1
+  local fname=$2
+
+  if [ ! -f $fname ]; then
+    echo "$fname not available locally, downloading.."
+    wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link
+  fi
+}
+
+# get matrix data
+pushd test-data
+get_data $MTX_LINK mtx.zip
+unzip mtx.zip
+rm -f mtx.zip
+
+get_data $RDS_LINK $BASENAME_FILE".rds"
+get_data $NORM_LINK $BASENAME_FILE"-normalised.rds"
+get_data $FVG_LINK $BASENAME_FILE"-fvg.rds"
+get_data $SCALED_LINK $BASENAME_FILE"-scaled.rds"
+get_data $PCA_LINK $BASENAME_FILE"-pca.rds"
+get_data $NEIGHBOURS_LINK $BASENAME_FILE"-neighbours.rds"
+get_data $CLUSTERS_LINK $BASENAME_FILE"-clusters.rds"
+get_data $TSNE_LINK $BASENAME_FILE"-tsne.rds"
+get_data $MARKERS_LINK $BASENAME_FILE"-markers.csv.zip"
+
+unzip $BASENAME_FILE"-markers.csv.zip"
+rm -f $BASENAME_FILE"-markers.csv.zip"
+
+get_data $LOOM_LINK $BASENAME_FILE"_loom.h5"
+get_data $SCE_LINK $BASENAME_FILE"_sce.rds"
+get_data $H5AD_LINK $BASENAME_FILE".h5ad"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seurat_convert.xml	Fri Mar 04 07:28:21 2022 +0000
@@ -0,0 +1,114 @@
+<tool id="seurat_convert" name="Seurat 3 converter" profile="18.01" version="@SEURAT_VERSION@+galaxy0">
+    <description>translates different single cell formats</description>
+    <macros>
+        <import>seurat_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version" />
+    <command detect_errors="exit_code"><![CDATA[
+seurat-convert.R
+@INPUT_OBJECT@
+@OUTPUT_OBJECT@
+]]></command>
+    <inputs>
+      <expand macro="input_object_params"/>
+      <expand macro="output_object_params"/>
+    </inputs>
+
+    <outputs>
+      <expand macro="output_files"/>
+    </outputs>
+
+    <tests>
+        <test>
+          <conditional name="input">
+            <param name="format" value="rds_seurat"/>
+            <param name="rds_seurat_file" value="E-MTAB-6077-3k_features_90_cells.rds"/>
+          </conditional>
+          <param name="format" value="loom"/>
+          <output name="loom_file">
+            <assert_contents>
+              <has_h5_keys keys="layers,matrix,row_attrs"/>
+            </assert_contents>
+          </output>
+        </test>
+        <test>
+          <conditional name="input">
+            <param name="format" value="rds_seurat"/>
+            <param name="rds_seurat_file" value="E-MTAB-6077-3k_features_90_cells.rds"/>
+          </conditional>
+          <param name="format" value="rds_sce"/>
+          <output name="rds_sce_file">
+            <assert_contents>
+              <has_size value="2368166" delta="200000"/>
+            </assert_contents>
+          </output>
+        </test>
+        <test>
+          <conditional name="input">
+            <param name="format" value="loom"/>
+            <param name="loom_file" value="E-MTAB-6077-3k_features_90_cells_loom.h5" ftype="h5"/>
+          </conditional>
+          <param name="format" value="rds_sce"/>
+          <output name="rds_sce_file">
+            <assert_contents>
+              <has_size value="2432700" delta="200000"/>
+            </assert_contents>
+          </output>
+        </test>
+        <test>
+          <conditional name="input">
+            <param name="format" value="rds_sce"/>
+            <param name="rds_sce_file" value="E-MTAB-6077-3k_features_90_cells_sce.rds" ftype="rdata.sce"/>
+          </conditional>
+          <param name="format" value="rds_seurat"/>
+          <output name="rds_seurat_file">
+            <assert_contents>
+              <has_size value="2366603" delta="200000"/>
+            </assert_contents>
+          </output>
+        </test>
+        <test>
+          <conditional name="input">
+            <param name="format" value="anndata"/>
+            <param name="anndata_file" value="E-MTAB-6077-3k_features_90_cells.h5ad" ftype="h5ad"/>
+          </conditional>
+          <param name="format" value="rds_seurat"/>
+          <output name="rds_seurat_file">
+            <assert_contents>
+              <has_size value="2965562" delta="200000"/>
+            </assert_contents>
+          </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+This tool uses Seurat 3 to convert formats. Possible inputs are:
+
+    * Seurat 3
+    * Loom (probably earlier than Loom 3.0)
+    * AnnData (contemporary versions to Seurat 3, most likely up to AnnData 0.6.22.post1)
+    * Single Cell Experiment
+
+Possible outputs are:
+
+    * Seurat 3
+    * Loom (as produced by loomR package)
+    * Single Cell Experiment
+
+For newer versions of AnnData and Loom, please try the Seurat 4 version of this tool.
+
+@SEURAT_INTRO@
+
+
+.. _Seurat: https://www.nature.com/articles/nbt.4096
+.. _Satija Lab: https://satijalab.org/seurat/
+
+@VERSION_HISTORY@
+
+]]></help>
+      <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seurat_macros.xml	Fri Mar 04 07:28:21 2022 +0000
@@ -0,0 +1,133 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@VERSION@">0.3.0</token>
+    <token name="@SEURAT_VERSION@">3.2.3</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">seurat-scripts</requirement>
+        </requirements>
+    </xml>
+    <xml name="version">
+    	<version_command><![CDATA[
+echo $(R --version | grep version | grep -v GNU)", seurat version" $(R --vanilla --slave -e "library(seurat); cat(sessionInfo()\$otherPkgs\$seurat\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+    ]]></version_command>
+    </xml>
+
+   <xml name="input_object_params">
+     <conditional name="input" label="Input format">
+       <param type="select" name="format" label="Choose the format of the input" help="RData, Loom or AnnData">
+         <option value="rds_seurat" selected="true">RDS with a Seurat object</option>
+         <option value="loom">Loom</option>
+         <option value="anndata">AnnData</option>
+         <option value="rds_sce">RDS with a Single Cell Experiment object</option>
+       </param>
+       <when value="anndata">
+         <param type="data" name="anndata_file" label="AnnData file" help="The AnnData format provided by Scanpy" format="h5,h5ad"/>
+       </when>
+       <when value="loom">
+         <param type="data" name="loom_file" label="Loom file" help="Input as Loom v? file" format="h5,h5loom"/>
+       </when>
+       <when value="rds_seurat">
+         <param type="data" name="rds_seurat_file" label="RDS file" help="Input as RDS file with Seurat 3 object" format="rdata"/>
+       </when>
+       <when value="rds_sce">
+         <param type="data" name="rds_sce_file" label="RDS file" help="Input as RDS file with Single Cell Experiment object" format="rdata"/>
+       </when>
+     </conditional>
+   </xml>
+
+   <token name="@INPUT_OBJECT@">
+    #if $input.format == "anndata"
+        --input-object-file '$input.anndata_file' --input-format anndata
+    #else if $input.format == "loom"
+        --input-object-file '$input.loom_file' --input-format loom
+    #else if $input.format == "rds_seurat"
+        --input-object-file '$input.rds_seurat_file' --input-format seurat
+    #else if $input.format == "rds_sce"
+        --input-object-file '$input.rds_sce_file' --input-format singlecellexperiment
+    #end if
+   </token>
+
+   <xml name="output_object_params">
+     <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment or Loom">
+       <option value="rds_seurat" selected="true">RDS with a Seurat object</option>
+       <option value="loom">Loom</option>
+       <option value="rds_sce">RDS with a Single Cell Experiment object</option>
+     </param>
+   </xml>
+
+   <xml name="output_files">
+    <data name="loom_file" from_work_dir="seurat_obj.loom" format="h5" label="${tool.name} on ${on_string}: Seurat Loom">
+      <filter>format == 'loom'</filter>
+    </data>
+    <data name="rds_seurat_file" format="rdata" label="${tool.name} on ${on_string}: Seurat RDS">
+      <filter>format == 'rds_seurat'</filter>
+    </data>
+    <data name="rds_sce_file" format="rdata" label="${tool.name} on ${on_string}: Seurat Single Cell Experiment RDS">
+      <filter>format == 'rds_sce'</filter>
+    </data>
+   </xml>
+
+   <token name="@OUTPUT_OBJECT@">
+    #if $format == "anndata"
+        --output-object-file '$anndata_file' --output-format anndata
+    #else if $format == "loom"
+        --output-object-file seurat_obj.loom --output-format loom
+    #else if $format == "rds_seurat"
+        --output-object-file '$rds_seurat_file' --output-format seurat
+    #else if $format == "rds_sce"
+        --output-object-file '$rds_sce_file' --output-format singlecellexperiment
+    #end if
+   </token>
+
+    <xml name="genes-use-input">
+      <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv,txt,tabular" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/>
+    </xml>
+    <xml name="dims-use-input">
+      <param name="dims_use" argument="--dims-use" min="1" optional="true" type="integer" label="PCA Dimensions to use" help="Number of PCs (dimensions) to use in construction of the SNN graph."/>
+    </xml>
+
+    <token name="@SEURAT_INTRO@"><![CDATA[
+Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data.
+It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and
+interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse
+types of single cell data.
+      ]]></token>
+
+    <token name="@VERSION_HISTORY@"><![CDATA[
+**Version history**
+
+3.2.3+galaxy0: Moves to Seurat 3.2.3 and introduce convert method, improving format interconversion support.
+
+3.1.2_0.0.8: Update metadata parsing
+
+3.1.1_0.0.7: Exposes perplexity and enables tab input.
+
+3.1.1_0.0.6+galaxy0: Moved to Seurat 3.
+
+  Find clusters: removed dims-use, k-param, prune-snn.
+
+2.3.1+galaxy0: Improved documentation and further exposition of all script's options. Pablo Moreno, Jonathan Manning and Ni Huang, Expression Atlas team https://www.ebi.ac.uk/gxa/home  at
+EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski(github.com/drosofff) and Lea Bellenger(github.com/bellenger-l).
+
+0.0.1: Initial contribution. Maria Doyle (github.com/mblue9).
+      ]]></token>
+
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/nbt.4096</citation>
+            <citation type="bibtex">
+              @misc{r-seurat-scripts.git,
+              author = {Jonathan Manning, Pablo Moreno, EBI Gene Expression Team},
+              year = {2018},
+              title = {Seurat-scripts: command line interface for Seurat},
+              publisher = {GitHub},
+              journal = {GitHub repository},
+              url = {https://github.com/ebi-gene-expression-group/r-seurat-scripts.git},
+            }
+            </citation>
+            <citation type="doi">10.1038/s41592-021-01102-w</citation>
+        </citations>
+    </xml>
+</macros>