comparison vsnp_sample_names.xml @ 3:fb3defef50e5 draft

Uploaded
author greg
date Sun, 03 Jan 2021 15:54:36 +0000
parents a56648c94fd3
children e1cb13d6a82c
comparison
equal deleted inserted replaced
2:a56648c94fd3 3:fb3defef50e5
1 <tool id="vsnp_sample_names" name="vSNP: sample names" version="1.0.0"> 1 <tool id="vsnp_sample_names" name="vSNP: sample names" version="@WRAPPER_VERSION@.1" profile="@PROFILE@">
2 <description></description> 2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
3 <command detect_errors="exit_code"><![CDATA[ 6 <command detect_errors="exit_code"><![CDATA[
4 #import os 7 #import os
5 #import re 8 #import re
6 #set output_dir = 'output' 9
7 mkdir -p $output_dir 10 #set sample_name_read2 = None
8 #if str($input_type_cond.input_type) == "single": 11
9 ## We may have a single read or a pair, but in 12 #if $input_type_cond.input_type in ['single', 'pair']:
10 ## either case we want the same base file name. 13 #set read1 = $input_type_cond.read1
11 #set sample_name = $os.path.basename($input_type_cond.read.element_identifier) 14 #set sample_name = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
12 #if $sample_name.find(".") > 0: 15 #else:
13 #set sample_name = $sample_name.split(".")[0] 16 #set read1_filename = $input_type_cond.reads_collection['forward'].name
17 #set sample_name = re.sub('[^\s\w\-]', '_', str($read1_filename))
18 #end if
19
20 #if $sample_name.find('_R1') >0:
21 ## Something like CMC_20E1_R1.fastq.gz
22 #set sample_name = $sample_name.split('_R1')[0]
23 #else if $sample_name.find(".") > 0:
24 #if $read1.is_of_type('fastqsanger.gz'):
25 ## Something like my_sample.fastq.gz
26 #set sample_name = '.'.join($sample_name.split('.')[0:-2])
27 #else:
28 ## Something like my_sample.fastq
29 #set sample_name = $os.path.splitext($sample_name)[0]
14 #end if 30 #end if
15 #if $sample_name.find("_") > 0: 31 #else if $sample_name.find("_") > 0:
16 #set sample_name = $sample_name.split("_")[0] 32 #if $read1.is_of_type('fastqsanger.gz'):
33 ## Something like my_sample_fastq_gz
34 #set sample_name = '_'.join($sample_name.split('_')[0:-2])
35 #else:
36 ## Something like my_sample_fastq
37 #set sample_name = "_".join($sample_name.split("_")[0:-1])
17 #end if 38 #end if
18 && echo '$sample_name' > '$output'
19 #else:
20 #for $i in $input_type_cond.reads_collection:
21 #set sample_name = $os.path.basename($i.element_identifier)
22 #if $sample_name.find(".") > 0:
23 #set sample_name = $sample_name.split(".")[0]
24 #end if
25 #set output_file = $os.path.join($output_dir, $sample_name)
26 && echo '$sample_name' > '$output_file'
27 #end for
28 #end if 39 #end if
40 echo '$sample_name' > '$output'
29 ]]></command> 41 ]]></command>
30 <inputs> 42 <inputs>
31 <conditional name="input_type_cond"> 43 <conditional name="input_type_cond">
32 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> 44 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
33 <option value="single" selected="true">Single files</option> 45 <option value="single" selected="true">Single dataset</option>
34 <option value="collection">Collections of files</option> 46 <option value="pair">Dataset pair</option>
47 <option value="paired">List of dataset pairs</option>
35 </param> 48 </param>
36 <when value="single"> 49 <when value="single">
37 <param name="read" type="data" format="fastqsanger.gz,fastqsanger" label="Sample file"/> 50 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
38 </when> 51 </when>
39 <when value="collection"> 52 <when value="paired">
40 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of sample files"/> 53 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
54 </when>
55 <when value="pair">
56 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
57 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
41 </when> 58 </when>
42 </conditional> 59 </conditional>
43 </inputs> 60 </inputs>
44 <outputs> 61 <outputs>
45 <data name="output" format="txt"> 62 <data name="output" format="txt"/>
46 <filter>input_type_cond['input_type'] == 'single'</filter>
47 </data>
48 <collection name="output__collection" type="list">
49 <discover_datasets pattern="__name__" directory="output" format="txt" />
50 <filter>input_type_cond['input_type'] == 'collection'</filter>
51 </collection>
52 </outputs> 63 </outputs>
53 <tests> 64 <tests>
65 <!-- Single files -->
54 <test> 66 <test>
55 <param name="input_type" value="collection"/> 67 <param name="input_type" value="single"/>
68 <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/>
69 <output name="output" file="sample_names.txt" ftype="txt"/>
70 </test>
71 <!-- Paired reads -->
72 <test>
73 <param name="input_type" value="paired"/>
56 <param name="reads_collection"> 74 <param name="reads_collection">
57 <collection type="list"> 75 <collection type="paired">
58 <element name="BCG_Danish_Human_UK_SRR9596061.fastq" value="BCG_Danish_Human_UK_SRR9596061.fastq" dbkey="89"/> 76 <element name="forward" value="CMC_20E1_R1.fastq.gz"/>
59 <element name="Dassie_Dassie_ZA_SRR3745455.fastq" value="Dassie_Dassie_ZA_SRR3745455.fastq" dbkey="89"/> 77 <element name="reverse" value="CMC_20E1_R2.fastq.gz"/>
60 <element name="Mbov_Cattle_NI_SRR10993937.fastq" value="Mbov_Cattle_NI_SRR10993937.fastq" dbkey="89"/>
61 </collection> 78 </collection>
62 </param> 79 </param>
63 <output_collection name="output__collection" type="list"> 80 <output name="output" file="sample_names.txt" ftype="txt"/>
64 <element name="BCG_Danish_Human_UK_SRR9596061" file="BCG_Danish_Human_UK_SRR9596061" ftype="txt"/> 81 </test>
65 <element name="Dassie_Dassie_ZA_SRR3745455" file="Dassie_Dassie_ZA_SRR3745455" ftype="txt"/> 82 <!-- Paired reads in separate datasets -->
66 <element name="Mbov_Cattle_NI_SRR10993937" file="Mbov_Cattle_NI_SRR10993937" ftype="txt"/> 83 <test>
67 </output_collection> 84 <param name="input_type" value="pair"/>
85 <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/>
86 <param name="read2" value="CMC_20E1_R2.fastq.gz" dbkey="89"/>
87 <output name="output" file="sample_names.txt" ftype="txt"/>
68 </test> 88 </test>
69 </tests> 89 </tests>
70 <help> 90 <help>
71 **What it does** 91 **What it does**
72 92
73 Accepts one or more sample files and extracts a unique portion of the file name as the content of the output file(s). These 93 Accepts fastqsanger sample files, extracts a unique portion of the file name as the sample name, and writes it to
74 text files are then used as workflow parameter values for the Read Group Identifier parameter in the bwa-mem tool. 94 the output. The output text file can be consumed by the **Parse parameter value** expression tool to provide workflow
75 95 parameter values to the **Read group identifier (ID)** and the **Sample name identifier (SM)** parameters in the
76 **Required Options** 96 **Map with BWA-MEM** tool.
77
78 * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads) based on the selected option.
79 </help> 97 </help>
80 <citations> 98 <expand macro="citations"/>
81 <citation type="bibtex">
82 @misc{None,
83 journal = {None},
84 author = {1. Stuber T},
85 title = {Manuscript in preparation},
86 year = {None},
87 url = {https://github.com/USDA-VS/vSNP},}
88 </citation>
89 </citations>
90 </tool> 99 </tool>
91 100