Mercurial > repos > greg > vsnp_sample_names
changeset 0:f75e2ac7b6cd draft
Uploaded
| author | greg | 
|---|---|
| date | Tue, 21 Apr 2020 10:17:20 -0400 | 
| parents | |
| children | 895d18fcfebe | 
| files | .shed.yml test-data/BCG_Danish_Human_UK_SRR9596061 test-data/BCG_Danish_Human_UK_SRR9596061.fastq test-data/Dassie_Dassie_ZA_SRR3745455 test-data/Dassie_Dassie_ZA_SRR3745455.fastq test-data/Mbov_Cattle_NI_SRR10993937 test-data/Mbov_Cattle_NI_SRR10993937.fastq vsnp_sample_names.xml | 
| diffstat | 8 files changed, 133 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Tue Apr 21 10:17:20 2020 -0400 @@ -0,0 +1,11 @@ +name: vsnp_sample_names +owner: greg +description: | + Contains a tool that outputs a unique portion of the input file name. +homepage_url: https://github.com/USDA-VS/vSNP +long_description: | + Contains a tool that outputs a unique portion of the input file name. +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_sample_names +type: unrestricted +categories: + - Sequence Analysis
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BCG_Danish_Human_UK_SRR9596061 Tue Apr 21 10:17:20 2020 -0400 @@ -0,0 +1,1 @@ +BCG_Danish_Human_UK_SRR9596061
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BCG_Danish_Human_UK_SRR9596061.fastq Tue Apr 21 10:17:20 2020 -0400 @@ -0,0 +1,14 @@ +@SRR9596061.1 1 length=100 +GACCCCGGTTCAGGCTTCACCACAGTGTGGAACGCGGTCGTCTCCGAACTTAACGGCGACCCTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCA ++SRR9596061.1 1 length=100 +??<?BA@D8@F<F<;C?CFGAFAGI?@9:D<?B?@:A;';;1(=A=4;<;AC>AB>@B;755?<<>C:>?C@############################ +@SRR9596061.2 2 length=100 +GCTGAGATTAGCATCACTGCTGGGTCCGTCGTCAACCTTAGGGTCGCCGTTAAGTTCGGAGACGACCGCGTTCCACACTGTGGTGAAGCCTGAACCGGGG ++SRR9596061.2 2 length=100 +?<<++A2=?ADDDEEI9A?>AECEA9CFEEEC??:BDIIEB9?9?@ACDA;;CCCDDD/;=<????;=?>;;034+>>>>AA93<>::A8>2<>A?>9;9 +@SRR9596061.3 3 length=100 +GCTCGATTTCGTTTTGGACAAAGCTGCTCGGCACGGATAACAGAGCAAAACCCTCGACGATGGTCAATGGCTTGACTAAATTGAGCCAAGCCATTTCGCG ++SRR9596061.3 3 length=100 +????D?DDB<D<AAE?1+<+<F9FF3E<91?:)?D@((0.BC4)8@@CD).;@C=A############################################ +@SRR9596061.4 4 length=100 +GAACTTAACGGCGACCCTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCAGCGCTCCGCTGACCCCTCAGCAAAGGGCTTGGCTCAATCTCGTCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Dassie_Dassie_ZA_SRR3745455 Tue Apr 21 10:17:20 2020 -0400 @@ -0,0 +1,1 @@ +Dassie_Dassie_ZA_SRR3745455
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Dassie_Dassie_ZA_SRR3745455.fastq Tue Apr 21 10:17:20 2020 -0400 @@ -0,0 +1,8 @@ +@SRR3745455.1 1 length=251 +ATACAAGACCGAGCTGATCAAACCCGGCAAGCCCTGGCGGTCCATCGAGGATGTCGAGTTGGCCACCGCGCGCTGGGTCGACTGGTTCAACCATCGCCGCCTCTACCAGTACTGCGGCGACGTCCCGCCGGTCGAACTCGAGGCTGCCTACTACGCTCAACGCCAGAGACCAGCCGCCGGCTGAGGTCTCAGATCAGAGAGTCTCCGGACTCACCGGGGCGGTTCAGGTTGTGATTTGATCAGGCATGCCG ++SRR3745455.1 1 length=251 +CCCBCFFFFFCCGGGGGGGGGGGFGGGGGGGHGHHAHHGGGGGHHGGHGGGHHHHHHGHFHGHGFGHGGGFFGGGCGHHGGGGGGHGHHHHHGHHGGFFGGGGGHHHHHHHHHHGGF?CCGGCGGGFGGGGGGFGFFFFDFFFFFFEFFFFFFBFFFFDFFBFFFFFFFBEFFFEBF=B;DCFFFFFBFFFFBFFFBFBFFFFFF0.-DDAFFFFF;--@DF99.00:;;/::0BFFFBFE00;/EFFFF- +@SRR3745455.2 2 length=251 +GTCTTCGGTGCGGTAGCGCCGAATGTCCAACGGAGAGTGCAAAATACCATGAAGCTCATTCTCACGGCCGATGTCGATCACCTCGGGTCCATCGGCGACACTGTCGAGGTCAAGGACGGGTATGGCCGTAACTTTCTGCTCCCGCGCGGCCTGGCGATCGTCGCCTCGCGCGGAGCCCAGAAGCAGGCTGACGAGATCCGCCGGGCCCGCGAAACCAAAAGCGTACGCGACCTAGAGCACGCCAACGAGAT ++SRR3745455.2 2 length=251 +AAAAAFAB>A?DGEEGGGGGFGGGHGHHFGFGGFGGGFFHFFHHHHHHFHHHHHHHFHFGHHHHHEGGGGGGGHHGFGHEGFFAF?CGGHFFFGGGGGCGGHFDGGCC/CHGHGFHHGGG@DHHFHHGGGGCHHHHHBGGHHGDGC?BCAGGEGGGGFEC9ADC?.ADFFFFFAFFFE?E./BBFFFDFFFEDFBFFF/;.;@@;ACFFF-=;-BFEEEFFFDF?--.;9-9AFBFB/BFFF.D>ED.9..
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Mbov_Cattle_NI_SRR10993937 Tue Apr 21 10:17:20 2020 -0400 @@ -0,0 +1,1 @@ +Mbov_Cattle_NI_SRR10993937
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Mbov_Cattle_NI_SRR10993937.fastq Tue Apr 21 10:17:20 2020 -0400 @@ -0,0 +1,6 @@ +@M00963:204:000000000-B953L:1:1102:18443:7739 1:N:0:16 +GCACCTGAGATTGACACCGCCCGCGAATACGAAGTAACCGCCGAATACCAGTCCTGGCGGGCCGTCTAGGGAAGCGACGCAGCATTACTTACGGTCGGCGTCGGGATAGGCCCGGCCATCCCCCTCGGGTGGCTCACGTTAGCGAACCGGCACCCGGACAAACCTGGGGCGGCCGCGAACCCAGCCCCTGCGGGGCTAACAACACGGTCCGCGCCCACCGCCGCCCCCCCGTCAACCGAGCCAATACCACA ++ +AAAAAFFFBFFFG11G11E1A0EGCGG?/1A//AFD2AAE/A/A>/FFGBCGHBFGHFGG////>/0/21?BGA///><</ACGDF1?111=<.0.------;---.;////.-:@--/9....-;@A@-@--9//;9--;9;---@-9-9-9--9--9---;9/9-------@-----;@-----;A--9---9-9-/;BB9-;9-9B-9-------------@----;-;A--/---9;@/-//99//; +@M00963:204:000000000-B953L:1:1102:15355:7851 1:N:0:16 +GTTCGGCGCGCCGGTGGGTCCTGATCCGCTGGCGGGTTTCGTCAACGACAGCGGCCTGCGACCCCGATATCCCGCGATTGAGATCTACCGGGTGAGCGCGCCCGCCAACCCCGGGGCGCCCTACTTCGCCGCGACCGACCAGCTCGCCCGCGTCGACGGCGGACCCGAGGTCCTGCTGCGGCTGGACGAACGACGCCGGCTGCAGGGCCAGCCCCCCCTGGGGCCGGTGCTGATGACCGCGGACGCCCGAG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vsnp_sample_names.xml Tue Apr 21 10:17:20 2020 -0400 @@ -0,0 +1,91 @@ +<tool id="vsnp_sample_names" name="vSNP: sample names" version="1.0.0"> + <description></description> + <command detect_errors="exit_code"><![CDATA[ +#import os +#import re +#set output_dir = 'output' +mkdir -p $output_dir +#if str($input_type_cond.input_type) == "single": + ## We may have a single read or a pair, but in + ## either case we want the same base file name. + #set sample_name = $os.path.basename($input_type_cond.read.element_identifier) + #if $sample_name.find(".") > 0: + #set sample_name = $sample_name.split(".")[0] + #end if + #if $sample_name.find("_") > 0: + #set sample_name = $sample_name.split("_")[0] + #end if + echo $sample_name > $output +#else: + #for $i in $input_type_cond.reads_collection: + #set sample_name = $os.path.basename($i.element_identifier) + #if $sample_name.find(".") > 0: + #set sample_name = $sample_name.split(".")[0] + #end if + #set output_file = $os.path.join($output_dir, $sample_name) + && echo $sample_name > $output_file + #end for +#end if +]]></command> + <inputs> + <conditional name="input_type_cond"> + <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> + <option value="single" selected="true">Single files</option> + <option value="collection">Collections of files</option> + </param> + <when value="single"> + <param name="read" type="data" format="fastqsanger.gz,fastqsanger" label="Sample file"/> + </when> + <when value="collection"> + <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of sample files"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" format="txt"> + <filter>input_type_cond['input_type'] == 'single'</filter> + </data> + <collection name="output__collection" type="list"> + <discover_datasets pattern="__name__" directory="output" format="txt" /> + <filter>input_type_cond['input_type'] == 'collection'</filter> + </collection> + </outputs> + <tests> + <test> + <param name="input_type" value="collection"/> + <param name="reads_collection"> + <collection type="list"> + <element name="BCG_Danish_Human_UK_SRR9596061.fastq" value="BCG_Danish_Human_UK_SRR9596061.fastq" dbkey="89"/> + <element name="Dassie_Dassie_ZA_SRR3745455.fastq" value="Dassie_Dassie_ZA_SRR3745455.fastq" dbkey="89"/> + <element name="Mbov_Cattle_NI_SRR10993937.fastq" value="Mbov_Cattle_NI_SRR10993937.fastq" dbkey="89"/> + </collection> + </param> + <output_collection name="output__collection" type="list"> + <element name="BCG_Danish_Human_UK_SRR9596061" file="BCG_Danish_Human_UK_SRR9596061" ftype="txt"/> + <element name="Dassie_Dassie_ZA_SRR3745455" file="Dassie_Dassie_ZA_SRR3745455" ftype="txt"/> + <element name="Mbov_Cattle_NI_SRR10993937" file="Mbov_Cattle_NI_SRR10993937" ftype="txt"/> + </output_collection> + </test> + </tests> + <help> +**What it does** + +Accepts one or more sample files and extracts a unique portion of the file name as the content of the output file(s). These +text files are then used as workflow parameter values for the Read Group Identifier parameter in the bwa-mem tool. + +**Required Options** + + * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads) based on the selected option. + </help> + <citations> + <citation type="bibtex"> + @misc{None, + journal = {None}, + author = {1. Stuber T}, + title = {Manuscript in preparation}, + year = {None}, + url = {https://github.com/USDA-VS/vSNP},} + </citation> + </citations> +</tool> +
