diff seq2hla.xml @ 0:52bba1cd3823 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seq2hla commit 96f219ed86ce674def72c73393d3c29c87cb56e5"
author iuc
date Thu, 20 Feb 2020 18:27:07 -0500
parents
children 5a7cd6c39085
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seq2hla.xml	Thu Feb 20 18:27:07 2020 -0500
@@ -0,0 +1,106 @@
+<tool id="seq2hla" name="seq2HLA" version="2.2.0">
+    <description>HLA genotype and expression from RNA-seq</description>
+    <requirements>
+        <!-- needed because of some non POSIX compliant wc usage -->
+        <requirement type="package" version="8.31">coreutils</requirement>
+        <requirement type="package" version="2.2">seq2hla</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        seq2HLA --runName='$run_name' -1 '$fastq_input1' -2 '$fastq_input2'
+        #if str($trim) != '':
+            --trim3=$trim
+        #end if
+        -p \${GALAXY_SLOTS:-1} >> '$seq2hla_log'
+        && cp -p '${run_name}-ClassI.HLAgenotype2digits' '${c1_genotype2digits}'
+        && cp -p '${run_name}-ClassI.HLAgenotype4digits' '${c1_genotype4digits}'
+        && echo '#Locus_RPKM' | cat - '${run_name}-ClassI.expression' | sed 's/^\(.*\): \([0-9.]*\).*$/\1_\2/' | tr '_' '\t' > '${c1_expression}'
+        && cp -p '${run_name}-ClassII.HLAgenotype4digits' '${c2_genotype4digits}'
+        && echo '#Locus_RPKM' | cat - '${run_name}-ClassII.expression' | sed 's/^\(.*\): \([0-9.]*\).*$/\1_\2/' | tr '_' '\t' > '${c2_expression}'
+        && cp -p '${run_name}.ambiguity' '${ambiguity}'
+    ]]></command>
+    <inputs>
+        <param name="run_name" type="text" value="sample1" label="Name prefix for this analysis">
+            <validator type="regex" message="Use letters,digits,_.-">^[A-Za-z0-9_\-+.]+$</validator>
+        </param>
+        <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/>
+        <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/>
+        <param name="trim" type="integer" value="" min="0" optional="true" label="Trim bases from 3 prime" 
+               help="trim this many bases from the low-quality end of each read" />
+    </inputs>
+    <outputs>
+        <data format="txt" name="seq2hla_log" label="${tool.name} on ${on_string}: ${run_name} logfile"/>
+        <data format="tabular" name="c1_genotype2digits" label="${tool.name} on ${on_string}: ${run_name}-ClassI.HLAgenotype2digits"/>
+        <data format="tabular" name="c1_genotype4digits" label="${tool.name} on ${on_string}: ${run_name}-ClassI.HLAgenotype4digits"/>
+        <data format="tabular" name="c2_genotype4digits" label="${tool.name} on ${on_string}: ${run_name}-ClassII.HLAgenotype4digits"/>
+        <data format="tabular" name="c1_expression" label="${tool.name} on ${on_string}: ${run_name}-ClassI.expression"/>
+        <data format="tabular" name="c2_expression" label="${tool.name} on ${on_string}: ${run_name}-ClassII.expression"/>
+        <data format="tabular" name="c2_expression" label="${tool.name} on ${on_string}: ${run_name}-ClassII.expression"/>
+        <data format="txt" name="ambiguity" label="${tool.name} on ${on_string}: ${run_name}.ambiguity"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="run_name" value="test"/>
+            <param name="fastq_input1" ftype="fastqsanger" value="reads1.fastq"/>
+            <param name="fastq_input2" ftype="fastqsanger" value="reads2.fastq"/>
+            <output name="c1_genotype4digits">
+                <assert_contents>
+                    <has_text text="A*24:02" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+**seq2HLA**  *HLA typing from RNA-Seq sequence reads*
+
+Release: 2.2
+
+seq2HLA_ is an in-silico method, written in python and R, which takes standard RNA-Seq sequence reads in fastq format
+as input, uses a bowtie index comprising all HLA alleles and outputs the most likely HLA class I and class II genotypes (in 4 digit resolution),
+a p-value for each call, and the expression of each class
+
+**Inputs** 
+
+    Paired read fastq files with illumina style IDs.  
+
+
+**Outputs** 
+
+    1. <prefix>-ClassI.HLAgenotype2digits => 2 digit result of Class I
+    2. <prefix>-ClassII.HLAgenotype2digits => 2 digit result of Class II
+    3. <prefix>-ClassI.HLAgenotype4digits => 4 digit result of Class I
+    4. <prefix>-ClassII.HLAgenotype4digits => 4 digit result of Class II
+    5. <prefix>.ambiguity => reports typing ambuigities (more than one solution for an allele possible)
+    6. <prefix>-ClassI.expression => expression of Class I alleles
+    7. <prefix>-ClassII.expression => expression of Class II alleles
+
+
+    ClassI.HLAgenotype4digits
+
+       =======  ========  ===========  ========  ============
+        #Locus  Allele 1  Confidence   Allele 2  Confidence
+       =======  ========  ===========  ========  ============
+        A       A*03:01   0.000510333  A*02:01'  0.0005975604
+        B       B*50:01   0.001271273  B*58:02   3.52561e-05
+        C       C*04:01   0.06362723   C*06:02   0.04725865
+       =======  ========  ===========  ========  ============
+
+
+    ClassI.expression
+
+       =======  ======
+        #Locus   RPKM
+       =======  ======
+        A        89.59
+        B       139.66
+        C       184.42
+       =======  ======
+
+
+.. _seq2HLA: https://bitbucket.org/sebastian_boegel/seq2hla
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1186/gm403</citation>
+    </citations>
+</tool>