vsnp_sample_names: vsnp_sample_names.xml comparison

comparison vsnp_sample_names.xml @ 3:fb3defef50e5 draft

Uploaded

author	greg
date	Sun, 03 Jan 2021 15:54:36 +0000
parents	a56648c94fd3
children	e1cb13d6a82c

comparison

equal deleted inserted replaced

-:a56648c94fd3
+:fb3defef50e5
-<tool id="vsnp_sample_names" name="vSNP: sample names" version="1.0.0">
+<tool id="vsnp_sample_names" name="vSNP: sample names" version="@WRAPPER_VERSION@.1" profile="@PROFILE@">
 <description></description>
+<macros>
+<import>macros.xml</import>
+</macros>
 <command detect_errors="exit_code"><![CDATA[
 #import os
 #import re
-#set output_dir = 'output'
-mkdir -p $output_dir
+#set sample_name_read2 = None
-#if str($input_type_cond.input_type) == "single":
-## We may have a single read or a pair, but in
+#if $input_type_cond.input_type in ['single', 'pair']:
-## either case we want the same base file name.
+#set read1 = $input_type_cond.read1
-#set sample_name = $os.path.basename($input_type_cond.read.element_identifier)
+#set sample_name = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
-#if $sample_name.find(".") > 0:
+#else:
-#set sample_name = $sample_name.split(".")[0]
+#set read1_filename = $input_type_cond.reads_collection['forward'].name
+#set sample_name = re.sub('[^\s\w\-]', '_', str($read1_filename))
+#end if
+#if $sample_name.find('_R1') >0:
+## Something like CMC_20E1_R1.fastq.gz
+#set sample_name = $sample_name.split('_R1')[0]
+#else if $sample_name.find(".") > 0:
+#if $read1.is_of_type('fastqsanger.gz'):
+## Something like my_sample.fastq.gz
+#set sample_name = '.'.join($sample_name.split('.')[0:-2])
+#else:
+## Something like my_sample.fastq
+#set sample_name = $os.path.splitext($sample_name)[0]
 #end if
-#if $sample_name.find("_") > 0:
+#else if $sample_name.find("_") > 0:
-#set sample_name = $sample_name.split("_")[0]
+#if $read1.is_of_type('fastqsanger.gz'):
+## Something like my_sample_fastq_gz
+#set sample_name = '_'.join($sample_name.split('_')[0:-2])
+#else:
+## Something like my_sample_fastq
+#set sample_name = "_".join($sample_name.split("_")[0:-1])
 #end if
-&& echo '$sample_name' > '$output'
-#else:
-#for $i in $input_type_cond.reads_collection:
-#set sample_name = $os.path.basename($i.element_identifier)
-#if $sample_name.find(".") > 0:
-#set sample_name = $sample_name.split(".")[0]
-#end if
-#set output_file = $os.path.join($output_dir, $sample_name)
-&& echo '$sample_name' > '$output_file'
-#end for
 #end if
+echo '$sample_name' > '$output'
 ]]></command>
 <inputs>
 <conditional name="input_type_cond">
 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
-<option value="single" selected="true">Single files</option>
+<option value="single" selected="true">Single dataset</option>
-<option value="collection">Collections of files</option>
+<option value="pair">Dataset pair</option>
+<option value="paired">List of dataset pairs</option>
 </param>
 <when value="single">
-<param name="read" type="data" format="fastqsanger.gz,fastqsanger" label="Sample file"/>
+<param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
 </when>
-<when value="collection">
+<when value="paired">
-<param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of sample files"/>
+<param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
+</when>
+<when value="pair">
+<param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
+<param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
 </when>
 </conditional>
 </inputs>
 <outputs>
-<data name="output" format="txt">
+<data name="output" format="txt"/>
-<filter>input_type_cond['input_type'] == 'single'</filter>
-</data>
-<collection name="output__collection" type="list">
-<discover_datasets pattern="__name__" directory="output" format="txt" />
-<filter>input_type_cond['input_type'] == 'collection'</filter>
-</collection>
 </outputs>
 <tests>
+<!-- Single files -->
 <test>
-<param name="input_type" value="collection"/>
+<param name="input_type" value="single"/>
+<param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/>
+<output name="output" file="sample_names.txt" ftype="txt"/>
+</test>
+<!-- Paired reads -->
+<test>
+<param name="input_type" value="paired"/>
 <param name="reads_collection">
-<collection type="list">
+<collection type="paired">
-<element name="BCG_Danish_Human_UK_SRR9596061.fastq" value="BCG_Danish_Human_UK_SRR9596061.fastq" dbkey="89"/>
+<element name="forward" value="CMC_20E1_R1.fastq.gz"/>
-<element name="Dassie_Dassie_ZA_SRR3745455.fastq" value="Dassie_Dassie_ZA_SRR3745455.fastq" dbkey="89"/>
+<element name="reverse" value="CMC_20E1_R2.fastq.gz"/>
-<element name="Mbov_Cattle_NI_SRR10993937.fastq" value="Mbov_Cattle_NI_SRR10993937.fastq" dbkey="89"/>
 </collection>
 </param>
-<output_collection name="output__collection" type="list">
+<output name="output" file="sample_names.txt" ftype="txt"/>
-<element name="BCG_Danish_Human_UK_SRR9596061" file="BCG_Danish_Human_UK_SRR9596061" ftype="txt"/>
+</test>
-<element name="Dassie_Dassie_ZA_SRR3745455" file="Dassie_Dassie_ZA_SRR3745455" ftype="txt"/>
+<!-- Paired reads in separate datasets -->
-<element name="Mbov_Cattle_NI_SRR10993937" file="Mbov_Cattle_NI_SRR10993937" ftype="txt"/>
+<test>
-</output_collection>
+<param name="input_type" value="pair"/>
+<param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/>
+<param name="read2" value="CMC_20E1_R2.fastq.gz" dbkey="89"/>
+<output name="output" file="sample_names.txt" ftype="txt"/>
 </test>
 </tests>
 <help>
 **What it does**
-Accepts one or more sample files and extracts a unique portion of the file name as the content of the output file(s).  These
+Accepts fastqsanger sample files, extracts a unique portion of the file name as the sample name, and writes it to
-text files are then used as workflow parameter values for the Read Group Identifier parameter in the bwa-mem tool.
+the output.  The output text file can be consumed by the **Parse parameter value** expression tool to provide workflow
+parameter values to the **Read group identifier (ID)** and the **Sample name identifier (SM)**  parameters in the
-**Required Options**
+**Map with BWA-MEM** tool.
-* **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads) based on the selected option.
 </help>
-<citations>
+<expand macro="citations"/>
-<citation type="bibtex">
-@misc{None,
-journal = {None},
-author = {1. Stuber T},
-title = {Manuscript in preparation},
-year = {None},
-url = {https://github.com/USDA-VS/vSNP},}
-</citation>
-</citations>
 </tool>

Mercurial > repos > greg > vsnp_sample_names

comparison vsnp_sample_names.xml @ 3:fb3defef50e5 draft