view wgmlst_add.xml @ 1:5b2c48fa0175 draft

planemo upload for repository https://github.com/bvalot/pyMLST commit fe04f6232a5f7ea78b666cb8036872902e79998b
author bvalot
date Wed, 07 Dec 2022 15:25:06 +0000
parents a3cc35af3635
children
line wrap: on
line source

<tool id="wgmlst_add_wrapper" name="Add strain to cg/wgMLST database"
	  version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
  <description></description>
  <macros>
    <import>macro.xml</import>
  </macros>
  <expand macro="requirements" />
  <stdio>
    <exit_code range="1:" level="fatal" />
  </stdio>
  <version_command>wgMLST -v</version_command>
  <command><![CDATA[
	cp '${database}' '${databaseout}'
	#if $data.input == "fasta"
	  #for $datain in $data.fastain:
	    && wgMLST add 
		-s '$datain.name.replace(" ", "_").replace("-", "_")'
		#if str($coverage)
		  -c $coverage
		#end if
		#if str($identity)
		  -i $identity
		#end if
		'${databaseout}' '${datain}'
		#if $log
	      2>> '${logfile}'
		#end if
	  #end for
	#else if $data.input == "fastqsingle"
	  #for $datain in $data.single:
	    && wgMLST add2 
		-s '$datain.name.replace(" ", "_").replace("-", "_")'
	    #if str($data.reads)
	      -r $data.reads
	    #end if
		#if str($coverage)
		  -c $coverage
		#end if
		#if str($identity)
		  -i $identity
		#end if
		'${databaseout}' '${datain}'
		#if $log
	      2>> '${logfile}'
		#end if
	  #end for
	#else if $data.input == "fastqpaired"
	  && wgMLST add2 
	  -s '$data.pairedfile.name.replace(" ", "_").replace("-", "_")'
	  #if str($data.reads)
	    -r $data.reads
	  #end if
	  #if str($coverage)
	    -c $coverage
	  #end if
	  #if str($identity)
	    -i $identity
	  #end if
	    '${databaseout}' '${data.pairedfile.forward}' '${data.pairedfile.reverse}'
	  #if $log
	    2>> '${logfile}'
	  #end if
	#else
	  #for $datain in $data.pairedfilelist:
	    && wgMLST add2 
		-s '$datain.name.replace(" ", "_").replace("-", "_")'
	    #if str($data.reads)
	      -r $data.reads
	    #end if
		#if str($coverage)
		  -c $coverage
		#end if
		#if str($identity)
		  -i $identity
		#end if
		'${databaseout}' '${datain.forward}' '${datain.reverse}'
		#if $log
	      2>> '${logfile}'
		#end if
	  #end for
	#end if
	]]>
  </command>
  <inputs>
	<param name="database" type="data"
           format="sqlite"
           label="cg/wgMLST database"
           help="Sqlite format from cg/wgMLST database" />
	<conditional name="data">
	  <param name="input" type="select" label="Select type of data">
        <option value="fasta" selected="true">Assembly (fasta)</option>
        <option value="fastqsingle">Raw data (single)</option>
		<option value="fastqpaired">Raw data (paired)</option>
        <option value="fastqpaired2">Raw data (List of paired)</option>
	  </param>
	  <when value="fasta">
        <param name="fastain" type="data" format="fasta"
               label="Assembly Genome" help="Fasta format"
			   multiple="true" />
      </when>	  
	  <when value="fastqsingle">
        <param name="single" type="data" format="fastq,fastq.gz"
               label="Single read file" help="Fastq(gz) format"
			   multiple="true" />
		<param name="reads" type="integer" value="10" optional="true"
               label="Minimum reads coverage to search gene" />
      </when>
	  <when value="fastqpaired">
        <param name="pairedfile" type="data_collection" format="fastq,fastq.gz"
               label="Paired of read files" help="Fastq(gz) format"
               collection_type="paired" />
		<param name="reads" type="integer" value="10" optional="true"
			   label="Minimum reads coverage to search gene" />
	  </when>	
	  <when value="fastqpaired2">
        <param name="pairedfilelist" type="data_collection" format="fastq,fastq.gz"
               label="Paired of read files" help="Fastq(gz) format"
               collection_type="list:paired" />
		<param name="reads" type="integer" value="10" optional="true"
			   label="Minimum reads coverage to search gene" />
	  </when>	
	</conditional>
	<!-- <param name="strain" type="text" value="" size="50" -->
	<!-- 	   optional="false" -->
	<!-- 	   label="Strain" -->
	<!-- 	   help="Name of the strain"> -->
	<!--   <sanitizer invalid_char="_" > -->
    <!--     <valid initial="string.ascii_letters,string.digits"> -->
    <!--       <add value="_" /> -->
	<!-- 	  <add value="." /> -->
    <!--     </valid> -->
    <!-- </sanitizer> -->
	<!-- </param>  -->
	<param name="identity" type="float" value="0.9" optional="false"
		   label="Identity"
		   help="Minimum identity to search gene" />
	<param name="coverage" type="float" value="0.9" optional="false"
		   label="Coverage"
		   help="Minimum coverage to search gene" />
	<param name="log" type="boolean" checked="true"
           label="Write log file" />	
  </inputs>
  <outputs>
    <data name="databaseout" format="sqlite" label="${database.name}">
	</data>
    <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log">
	  <filter>log</filter>
	</data>	
  </outputs>
  <tests>
	<test expect_num_outputs="2">
	  <param name="database" value="wgmlst_ecoli.db" />
	  <conditional name="data">
		<param name="input" value="fasta" />
		<param name="fastain" ftype="fasta" value="EHSB-021.fasta,EHSB-092.fasta" />
	  </conditional>
	  <param name="identity" value="0.8" />
	  <output name="logfile" ftype="txt">
	  <assert_contents>
		<has_text text="Added 2497 new MLST genes to the database" />
		<has_text text="Found 32 partial genes, filled 30" />
		<has_text text="Removed 4 genes" />
		<has_text text="Added 2496 new MLST genes to the database" />
		<has_text text="Found 44 partial genes, filled 40" />
		<has_text text="Removed 5 genes" />
	  </assert_contents>
	  </output>
	</test>
	<test expect_num_outputs="2">
	  <param name="database" value="wgmlst_ecoli.db" />
	  <conditional name="data">
		<param name="input" value="fastqsingle" />
		<param name="single"
			   value="EHSB-021_R1.fastq.gz,EHSB-021_R2.fastq.gz" />
		<param name="reads" value="5" />
	  </conditional>
	  <output name="logfile" ftype="txt">
	  <assert_contents>
		<has_text_matching expression="Add 14\d\d new MLST genes to database" />
		<has_text_matching expression="Remove \d genes with uncertain bases" />
		<has_text_matching expression="Remove \d\d\d genes with bad CDS" />
	  </assert_contents>
	  </output>
	</test>
	<test expect_num_outputs="2">
	  <param name="database" value="wgmlst_ecoli.db" />
	  <conditional name="data">
		<param name="input" value="fastqpaired" />
		<param name="pairedfile">
		  <collection type="paired">
			<element name="forward" value="EHSB-021_R1.fastq.gz" />
			<element name="reverse" value="EHSB-021_R2.fastq.gz" />
          </collection>
		</param>
		<param name="reads" value="5" />
	  </conditional>
	  <output name="logfile" ftype="txt">
	  <assert_contents>
		<has_text_matching expression="Add 24\d\d new MLST genes to database" />
		<has_text_matching expression="Remove \d genes with uncertain bases" />
		<has_text_matching expression="Remove \d\d genes with bad CDS" />
	  </assert_contents>
	  </output>
	</test>
	<test expect_num_outputs="2">
	  <param name="database" value="wgmlst_ecoli.db" />
	  <conditional name="data">
		<param name="input" value="fastqpaired2" />
		<param name="pairedfilelist">
		  <collection type="list:paired">
			<element name="EHSB-021">
			  <collection type="paired">
				<element name="forward" value="EHSB-021_R1.fastq.gz" />
				<element name="reverse" value="EHSB-021_R2.fastq.gz" />
			  </collection>
			</element>
			<element name="EHSB-021bis">
			  <collection type="paired">
				<element name="forward" value="EHSB-021_R1.fastq.gz" />
				<element name="reverse" value="EHSB-021_R2.fastq.gz" />
			  </collection>
			</element>
          </collection>
		</param>
		<param name="reads" value="5" />
	  </conditional>
	  <output name="logfile" ftype="txt">
	  <assert_contents>
		<has_text_matching expression="Add 24\d\d new MLST genes to database" />
		<has_text_matching expression="Remove \d genes with uncertain bases" />
		<has_text_matching expression="Remove \d\d genes with bad CDS" />
	  </assert_contents>
	  </output>
	</test>
  </tests>
  <help>
**What it does**

Add a Strain to the cg/wgMLST database

You can use
- Genome assembly (blat search)
- Raw data (kma search)

**Options:**
  -s, --strain TEXT     Name of the strain (default:genome name).
  -i, --identity FLOAT   Minimum identity to search gene (default=0.9).
  -c, --coverage FLOAT   Minimum coverage to search gene (default=0.95).
  -r, --reads INTEGER    Minimum reads coverage to search gene (default=10).
  -f, --fasta FILENAME   Write fasta file with gene allele.

**License and citation**

This Galaxy tool is Copyright © 2022 `B. Valot` and is released under the `GPL3 license`.
  </help>
  <citations>
  </citations>
</tool>