Mercurial > repos > greg > vsnp_sample_names
comparison vsnp_sample_names.xml @ 3:fb3defef50e5 draft
Uploaded
author | greg |
---|---|
date | Sun, 03 Jan 2021 15:54:36 +0000 |
parents | a56648c94fd3 |
children | e1cb13d6a82c |
comparison
equal
deleted
inserted
replaced
2:a56648c94fd3 | 3:fb3defef50e5 |
---|---|
1 <tool id="vsnp_sample_names" name="vSNP: sample names" version="1.0.0"> | 1 <tool id="vsnp_sample_names" name="vSNP: sample names" version="@WRAPPER_VERSION@.1" profile="@PROFILE@"> |
2 <description></description> | 2 <description></description> |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
3 <command detect_errors="exit_code"><![CDATA[ | 6 <command detect_errors="exit_code"><![CDATA[ |
4 #import os | 7 #import os |
5 #import re | 8 #import re |
6 #set output_dir = 'output' | 9 |
7 mkdir -p $output_dir | 10 #set sample_name_read2 = None |
8 #if str($input_type_cond.input_type) == "single": | 11 |
9 ## We may have a single read or a pair, but in | 12 #if $input_type_cond.input_type in ['single', 'pair']: |
10 ## either case we want the same base file name. | 13 #set read1 = $input_type_cond.read1 |
11 #set sample_name = $os.path.basename($input_type_cond.read.element_identifier) | 14 #set sample_name = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) |
12 #if $sample_name.find(".") > 0: | 15 #else: |
13 #set sample_name = $sample_name.split(".")[0] | 16 #set read1_filename = $input_type_cond.reads_collection['forward'].name |
17 #set sample_name = re.sub('[^\s\w\-]', '_', str($read1_filename)) | |
18 #end if | |
19 | |
20 #if $sample_name.find('_R1') >0: | |
21 ## Something like CMC_20E1_R1.fastq.gz | |
22 #set sample_name = $sample_name.split('_R1')[0] | |
23 #else if $sample_name.find(".") > 0: | |
24 #if $read1.is_of_type('fastqsanger.gz'): | |
25 ## Something like my_sample.fastq.gz | |
26 #set sample_name = '.'.join($sample_name.split('.')[0:-2]) | |
27 #else: | |
28 ## Something like my_sample.fastq | |
29 #set sample_name = $os.path.splitext($sample_name)[0] | |
14 #end if | 30 #end if |
15 #if $sample_name.find("_") > 0: | 31 #else if $sample_name.find("_") > 0: |
16 #set sample_name = $sample_name.split("_")[0] | 32 #if $read1.is_of_type('fastqsanger.gz'): |
33 ## Something like my_sample_fastq_gz | |
34 #set sample_name = '_'.join($sample_name.split('_')[0:-2]) | |
35 #else: | |
36 ## Something like my_sample_fastq | |
37 #set sample_name = "_".join($sample_name.split("_")[0:-1]) | |
17 #end if | 38 #end if |
18 && echo '$sample_name' > '$output' | |
19 #else: | |
20 #for $i in $input_type_cond.reads_collection: | |
21 #set sample_name = $os.path.basename($i.element_identifier) | |
22 #if $sample_name.find(".") > 0: | |
23 #set sample_name = $sample_name.split(".")[0] | |
24 #end if | |
25 #set output_file = $os.path.join($output_dir, $sample_name) | |
26 && echo '$sample_name' > '$output_file' | |
27 #end for | |
28 #end if | 39 #end if |
40 echo '$sample_name' > '$output' | |
29 ]]></command> | 41 ]]></command> |
30 <inputs> | 42 <inputs> |
31 <conditional name="input_type_cond"> | 43 <conditional name="input_type_cond"> |
32 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> | 44 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> |
33 <option value="single" selected="true">Single files</option> | 45 <option value="single" selected="true">Single dataset</option> |
34 <option value="collection">Collections of files</option> | 46 <option value="pair">Dataset pair</option> |
47 <option value="paired">List of dataset pairs</option> | |
35 </param> | 48 </param> |
36 <when value="single"> | 49 <when value="single"> |
37 <param name="read" type="data" format="fastqsanger.gz,fastqsanger" label="Sample file"/> | 50 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> |
38 </when> | 51 </when> |
39 <when value="collection"> | 52 <when value="paired"> |
40 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of sample files"/> | 53 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> |
54 </when> | |
55 <when value="pair"> | |
56 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> | |
57 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> | |
41 </when> | 58 </when> |
42 </conditional> | 59 </conditional> |
43 </inputs> | 60 </inputs> |
44 <outputs> | 61 <outputs> |
45 <data name="output" format="txt"> | 62 <data name="output" format="txt"/> |
46 <filter>input_type_cond['input_type'] == 'single'</filter> | |
47 </data> | |
48 <collection name="output__collection" type="list"> | |
49 <discover_datasets pattern="__name__" directory="output" format="txt" /> | |
50 <filter>input_type_cond['input_type'] == 'collection'</filter> | |
51 </collection> | |
52 </outputs> | 63 </outputs> |
53 <tests> | 64 <tests> |
65 <!-- Single files --> | |
54 <test> | 66 <test> |
55 <param name="input_type" value="collection"/> | 67 <param name="input_type" value="single"/> |
68 <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/> | |
69 <output name="output" file="sample_names.txt" ftype="txt"/> | |
70 </test> | |
71 <!-- Paired reads --> | |
72 <test> | |
73 <param name="input_type" value="paired"/> | |
56 <param name="reads_collection"> | 74 <param name="reads_collection"> |
57 <collection type="list"> | 75 <collection type="paired"> |
58 <element name="BCG_Danish_Human_UK_SRR9596061.fastq" value="BCG_Danish_Human_UK_SRR9596061.fastq" dbkey="89"/> | 76 <element name="forward" value="CMC_20E1_R1.fastq.gz"/> |
59 <element name="Dassie_Dassie_ZA_SRR3745455.fastq" value="Dassie_Dassie_ZA_SRR3745455.fastq" dbkey="89"/> | 77 <element name="reverse" value="CMC_20E1_R2.fastq.gz"/> |
60 <element name="Mbov_Cattle_NI_SRR10993937.fastq" value="Mbov_Cattle_NI_SRR10993937.fastq" dbkey="89"/> | |
61 </collection> | 78 </collection> |
62 </param> | 79 </param> |
63 <output_collection name="output__collection" type="list"> | 80 <output name="output" file="sample_names.txt" ftype="txt"/> |
64 <element name="BCG_Danish_Human_UK_SRR9596061" file="BCG_Danish_Human_UK_SRR9596061" ftype="txt"/> | 81 </test> |
65 <element name="Dassie_Dassie_ZA_SRR3745455" file="Dassie_Dassie_ZA_SRR3745455" ftype="txt"/> | 82 <!-- Paired reads in separate datasets --> |
66 <element name="Mbov_Cattle_NI_SRR10993937" file="Mbov_Cattle_NI_SRR10993937" ftype="txt"/> | 83 <test> |
67 </output_collection> | 84 <param name="input_type" value="pair"/> |
85 <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/> | |
86 <param name="read2" value="CMC_20E1_R2.fastq.gz" dbkey="89"/> | |
87 <output name="output" file="sample_names.txt" ftype="txt"/> | |
68 </test> | 88 </test> |
69 </tests> | 89 </tests> |
70 <help> | 90 <help> |
71 **What it does** | 91 **What it does** |
72 | 92 |
73 Accepts one or more sample files and extracts a unique portion of the file name as the content of the output file(s). These | 93 Accepts fastqsanger sample files, extracts a unique portion of the file name as the sample name, and writes it to |
74 text files are then used as workflow parameter values for the Read Group Identifier parameter in the bwa-mem tool. | 94 the output. The output text file can be consumed by the **Parse parameter value** expression tool to provide workflow |
75 | 95 parameter values to the **Read group identifier (ID)** and the **Sample name identifier (SM)** parameters in the |
76 **Required Options** | 96 **Map with BWA-MEM** tool. |
77 | |
78 * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads) based on the selected option. | |
79 </help> | 97 </help> |
80 <citations> | 98 <expand macro="citations"/> |
81 <citation type="bibtex"> | |
82 @misc{None, | |
83 journal = {None}, | |
84 author = {1. Stuber T}, | |
85 title = {Manuscript in preparation}, | |
86 year = {None}, | |
87 url = {https://github.com/USDA-VS/vSNP},} | |
88 </citation> | |
89 </citations> | |
90 </tool> | 99 </tool> |
91 | 100 |