Mercurial > repos > estrain > microrunqc
diff microrunqc.xml @ 0:a53acd38d77e draft
Uploaded
author | estrain |
---|---|
date | Tue, 24 Mar 2020 08:54:42 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/microrunqc.xml Tue Mar 24 08:54:42 2020 -0400 @@ -0,0 +1,192 @@ +<tool id="microrunqc" name="microrunqc" version="0.0.1"> + + <requirements> + <requirement type="package" version="2.3.0">skesa</requirement> + <requirement type="package" version="2.19.0">mlst</requirement> + <requirement type="package" version="0.7.17">bwa</requirement> + <requirement type="package" version="1.15.4">numpy</requirement> + <requirement type="package" version="0.4.2">fastq-scan</requirement> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ + + skesa + + #set fqscan = "text" + #if $jobtype.select == "fastq_fr" + #set outname = $jobtype.fastq1.name + #set bwalist = str($jobtype.fastq1) + " " + str($jobtype.fastq2) + --fastq $jobtype.fastq1,$jobtype.fastq2 + #if $jobtype.fastq1.is_of_type("fastq.gz") + #set fqscan = "gz" + #else if $jobtype.fastq1.is_of_type("fastqsanger.gz") + #set fqscan = "gz" + #end if + #else if $jobtype.select == "fastq_pair" + #set outname = $jobtype.coll.name + #set bwalist = str($jobtype.coll.forward) + " " + str($jobtype.coll.reverse) + --fastq $jobtype.coll.forward,$jobtype.coll.reverse + #if $jobtype.coll.forward.is_of_type("fastq.gz") + #set fqscan = "gz" + #else if $jobtype.coll.forward.is_of_type("fastqsanger.gz") + #set fqscan = "gz" + #end if + #end if + + #set num_cores = 1 + + #if $options.select =="basic" + --cores $num_cores + --memory 8 + #else if $options.select=="advanced" + #if $options.cores + #set num_cores = $options.cores + --cores $options.cores + #end if + #if $options.memory + --memory $options.memory + #end if + #if $options.hash_count + --hash_count + #end if + #if $options.estimated_kmers + --estimated_kmers $options.estimated.kmers + #end if + #if $options.skip + --skip_bloom_filter + #end if + #if $options.kmer + --kmer $options.kmer + #end if + #if $options.min_count + --min_count $options.min_count + #end if + #if $options.max_kmer_count + --max_kmer_count $options.max_kmer_count + #end if + #if $options.vector_percent + --vector_percent $options.vector_percent + #end if + #if $options.insert_size + --insert_size $options.insert.size + #end if + #if $options.steps + --steps $options.steps + #end if + #if $options.fraction + --fraction $options.fraction + #end if + #if $options.max_snp_len + --max_snp_len $options.max_snp_len + #end if + #if $options.min_contig + --min_contig $options.min_contig + #end if + #if $options.allow_snps + --allow_snps + #end if + #end if + + > ${outname}.fasta; + + bwa index ${outname}.fasta; + bwa mem -t $num_cores ${outname}.fasta ${bwalist} | python $__tool_directory__/median_size.py > insert.median; + + mlst --nopath --threads $num_cores + #if $options.select=="advanced" + #if $options.minid + --minid $options.minid + #end if + #if $options.mincov + --mincov $options.mincov + #end if + #if $options.minscore + --minscore $options.minscore + #end if + #end if + ${outname}.fasta > ${outname}.mlst.tsv; + + python $__tool_directory__/run_fastq_scan.py --fastq ${bwalist} --out fq_out.tab --type ${fqscan}; + + python $__tool_directory__/sum_mlst.py --fasta ${outname}.fasta --mlst ${outname}.mlst.tsv --med insert.median --fqscan fq_out.tab --out sum_qc.txt + + ]]></command> + <inputs> + <conditional name="jobtype"> + <param name="select" type="select" label="Select Input"> + <option value="fastq_fr">Forward and Reverse FASTQ</option> + <option value="fastq_pair">Paired FASTQ Collection</option> + </param> + <when value="fastq_fr"> + <param name="fastq1" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Forward FASTQ" /> + <param name="fastq2" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Reverse FASTQ" /> + </when> + <when value="fastq_pair"> + <param name="coll" label="Paired FASTQ" type="data_collection" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" collection_type="paired" /> + </when> + </conditional> + + <conditional name="options"> + <param name="select" type="select" label="Options Type"> + <option value="basic">Basic</option> + <option value="advanced">Advanced</option> + </param> + <when value="advanced"> + <param name="cores" optional="true" type="integer" label="Number of cores to use (Default=16)" value=""/> + <param name="memory" optional="true" type="integer" label="Memory available (Default=32GB)" value=""/> + <param name="hash_count" optional="true" type="boolean" label="hash counter"/> + <param name="estimated_kmers" optional="true" type="integer" label="Estimated number of unique kmers for bloom filter (Default=100)" value=""/> + <param name="skip" optional="true" type="boolean" label="skip bloom filter, use estimate kmers as the hash"/> + <param name="kmer" optional="true" type="integer" label="Minimal kmer length for assembly (Default=21)" value=""/> + <param name="min_count" optional="true" type="integer" label="Minimal count for kmers retained for comparing alternate choices" value=""/> + <param name="max_kmer_count" optional="true" type="integer" label="Minimum acceptable average count for estimating the maximal kmer length in reads" value=""/> + <param name="vector_percent" optional="true" type="float" label="Count for vectors as a fraction of the read number (0-1,1=disabled)" value=""> + <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/> + </param> + <param name="insert_size" optional="true" type="integer" label="Expected insert size for paired reads" value=""/> + <param name="steps" optional="true" type="integer" label="Number of assembly iterations from minimal to maximal kmer length in reads (Default=11)" value=""/> + <param name="fraction" optional="true" type="float" label="Maximum noise to signal ratio acceptable for extension (Default=0.1)" value=""> + <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/> + </param> + <param name="max_snp_len" optional="true" type="integer" label="Maximal snp length (Default=150)" value=""/> + <param name="min_contig" optional="true" type="integer" label="Minimal contig length reported in output (Default=200)" value=""/> + <param name="allow_snps" optional="true" type="boolean" label="Turn SNP discovery (Default=false)"/> + <param name="mincov" type="integer" label="Minimum DNA %coverage" value="10" help="Minimum DNA %coverage to report partial allele at all (default 10, must be between 0-100)" optional="true" /> + <param name="minid" type="integer" label="Minimum DNA %identity" value="95" min="0" max="100" help="Minimum DNA %identity of full allelle to consider 'similar' (default 95, must be between 0-100)" optional="true" /> + <param name="minscore" type="integer" label="Minimum score to match scheme" value="50" min="0" max="100" help="Minumum score out of 100 to match a scheme" optional="true" /> + </when> + <when value="basic"/> + </conditional> + </inputs> + <outputs> + <data format="fasta" name="results.skesa.fasta" label="${tool.name} on ${on_string}: Contigs" from_work_dir="*.fasta"/> + <data format="tabular" name="results.mlst.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.mlst.tsv"/> + <data format="tabular" name="qc_results.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.txt"/> + </outputs> + + <help><![CDATA[ + + ]]></help> + <citations> + <citation type="bibtex"> + @misc{pope_dashnow_zobel_holt_raven_schultz_inouye_tomita_2014, + title={skesa: eSKESA is a de-novo sequence read assembler for cultured single isolate genomes + based on DeBruijn graphs. It uses conservative heuristics and is designed to + create breaks at repeat regions in the genome. This leads to excellent sequence + quality but not necessarily a large N50 statistic. It is a multi-threaded + application that scales well with the number of processors. For different runs + with the same inputs, including the order of reads, the order and orientation + of contigs in the output is deterministic. }, + url={https://github.com/ncbi/ngs-tools/tree/master/tools/skesa/}, + author={National Center for Biotechnology Information }, + }</citation> + + <citation type="bibtex"> + @UNPUBLISHED{Seemann2016, + author = "Seemann T", + title = "MLST: Scan contig files against PubMLST typing schemes", + year = "2016", + url = {https://github.com/tseemann/mlst} + }</citation> + </citations> +</tool>