Mercurial > repos > estrain > srst2
changeset 0:efc202baae1f draft
planemo upload
author | estrain |
---|---|
date | Sun, 03 Dec 2017 13:28:11 -0500 |
parents | |
children | 43cc60256036 |
files | scoreProfiles.py srst2v2.xml |
diffstat | 2 files changed, 250 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scoreProfiles.py Sun Dec 03 13:28:11 2017 -0500 @@ -0,0 +1,82 @@ +#!/usr/bin/env + +## Generate basic summary stats for SRST2 (v2) allele score output. Generate summaries for each profile defined in the database +## author: errol strain, estrain@gmail.com + +from argparse import (ArgumentParser, FileType) +import sys +import glob +from decimal import Decimal + +def parse_args(): + "Parse the input arguments, use '-h' for help." + + parser = ArgumentParser(description='Generate Summary Scores for SRST2 Allele Score Output') + + # Read inputs + parser.add_argument('--mlst_definitions', type=str, required=True, nargs=1, help='MLST Definitions') + parser.add_argument('--output', type=str, required=True, nargs=1, help='MLST Definitions') + parser.add_argument('--profile_cov', type=str, required=False, help='Minimum Average Coverage to Report ST Profile',default="98") + parser.add_argument('--profile_max_mismatch', type=str, required=False, help='Maximum Number of Mismatches (SNPs)', default="1") + + return parser.parse_args() + +args =parse_args() + +allHash = {} + +# Read in Profile Database +profiles = open(args.mlst_definitions[0],"r") +output = open(args.output[0],"w") + +# Minimum mean percent coverage for reporting profile +min_per=float(args.profile_cov) +# Maximum mean mismatch for reporting profile +max_mis=float(args.profile_max_mismatch) + +# Read in Allele Scores +# Scores should be in srts2*.scores file +# Column 0:Allele, 1:Score, 2:Avg Depth, 5:% Coverage, 7:Mismatches, 8:Indels +scoreFile = open(glob.glob("srst2*.scores")[0],"r") +scoreFile.readline() + +for line in scoreFile.readlines() : + els = line.split("\t") + vals = els[0].split("_") + allHash.update({els[0]:line}) + + +# Allele names in first row +als = profiles.readline().rstrip() +filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels\n") +output.write(filehead) + +genes = als.split("\t") + +for line in profiles.readlines() : + line = line.rstrip() + els = line.split("\t") + alleles = [] + for i in range(1,len(genes)) : + alleles.append(genes[i] + "_" + els[i]) + mscore=0 + mdepth=0 + mcover=0 + mmisma=0 + mindel=0 + for i in alleles : + if i in allHash : + vals=str(allHash[i]).split("\t") + mscore+=float(vals[1]) + mdepth+=float(vals[2]) + mcover+=float(vals[5]) + mmisma+=int(vals[7]) + mindel+=int(vals[8]) + + mscore=round(Decimal(mscore/(len(genes)-1)),5) + mdepth=round(Decimal(mdepth/(len(genes)-1)),2) + mcover=round(Decimal(mcover/(len(genes)-1)),2) + + if mmisma<=max_mis and mcover>=min_per : + output.write(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel) + "\n") +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/srst2v2.xml Sun Dec 03 13:28:11 2017 -0500 @@ -0,0 +1,168 @@ +<tool id="srst2" name="SRST2 - Short Read Sequence Typer (v2)" version="0.2.0"> + <requirements> + <requirement type="package" version="0.2.0">srst2</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #if $paired_conditional.sPaired == "paired" + ln -s $paired_conditional.fastq1 sample_1.fastq; + ln -s $paired_conditional.fastq2 sample_2.fastq; + #end if + + srst2 + + #if $paired_conditional.sPaired == "single" + --input_se $paired_conditional.fastq1 + #else if $paired_conditional.sPaired == "paired" + --input_pe sample_1.fastq sample_2.fastq + #end if + + --output srst2out + --save_scores + + #if $job_type.job == "mlst" + --mlst_definitions $job_type.mlst_definitions + --mlst_db $job_type.mlst_db + --mlst_delimiter $job_type.mlstdelim + --mlst_max_mismatch $job_type.mlst_max_mismatch + #else if $job_type.job == "gene" + --gene_db $job_type.genedb + --gene_max_mismatch $job_type.gene_max_mismatch + #end if + + #if $options.select == "advanced" + #if $options.min_coverage + --min_coverage $options.min_coverage + #end if + #if $options.max_divergence + --max_divergence $options.max_divergence + #end if + #if $options.min_depth + --min_depth $options.min_depth + #end if + #if $options.min_edge_depth + --min_edge_depth $options.min_edge_depth + #end if + #if $options.prob_err + --prob_err $options.prob_err + #end if + #if $options.stop_after + --stop_after $options.stop_after + #end if + --other "'-p \${GALAXY_SLOTS:-1} + #if $options.maxins + --maxins $options.maxins + --minins $options.minins + #end if + '" + #if $options.mapq + --mapq $options.mapq + #end if + #if $options.baseq + --baseq $options.baseq + #end if + #else + --other "'-p \${GALAXY_SLOTS:-1}'" + #end if + + #if $job_type.job == "mlst" + ; python $__tool_directory__/scoreProfiles.py --mlst_definitions $job_type.mlst_definitions --profile_cov $job_type.profile_cov --profile_max_mismatch $job_type.profile_max_mismatch --output srst2.pscores + #end if + + ]]></command> + <inputs> + <conditional name="paired_conditional"> + <param name="sPaired" type="select" label="Single-End or Paired-End FASTQ?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param name="fastq1" type="data" format="fastq" label="FASTQ file" help="FASTQ" /> + </when> + <when value="paired"> + <param name="fastq1" type="data" format="fastq" label="Forward FASTQ file" help="FASTQ" /> + <param name="fastq2" type="data" format="fastq" label="Reverse FASTQ file" help="FASTQ" /> + </when> + </conditional> + + <conditional name="job_type"> + <param name="job" type="select" label="MLST or Gene Presence/Absence?"> + <option value="mlst">MLST</option> + <option value="gene">Gene</option> + </param> + <when value="mlst"> + <param type="data" name="mlst_db" label="Fasta file of MLST alleles" format="fasta" /> + <param type="data" name="mlst_definitions" label="ST definitions for MLST scheme" format="tabular" /> + <param type="text" name="mlstdelim" value="_" format="txt" label="Character(s) separating gene name from allele number in MLST database (default '_')" /> + <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read" /> + <param type="float" name="profile_max_mismatch" value="1" format="txt" label="Maximum number of mismatches for reporting ST profile" /> + <param type="float" name="profile_cov" value="98" format="txt" label="Minimum mean % coverage for reporting ST profile" /> + + </when> + <when value="gene"> + <param name="genedb" type="data" format="fasta" label="Fasta file for gene database" /> + <param name="gene_max_mismatch" type="integer" value="10" format="txt" label="Maximum number of mistaches per read (default 10)" /> + </when> + </conditional> + + <conditional name="options"> + <param name="select" type="select" label="Options Type"> + <option value="basic">Basic</option> + <option value="advanced">Advanced</option> + </param> + <when value="advanced"> + <param name="min_coverage" type="integer" label="Minimum %coverage cutoff for gene reporting" value="90"/> + <param name="max_divergence" type="integer" label="Maximum %divergence cutoff for gene reporting" value="10"/> + <param name="min_depth" type="integer" label="Minimum mean depth to flag as dubious allele call" value="5"/> + <param name="min_edge_depth" type="integer" label="Minimum edge depth to flag as dubious allele call" value="2"/> + <param name="prob_err" type="float" label="Probability of sequencing error" value="0.01"/> + <param name="stop_after" type="integer" label="Stop mapping after this number of reads have been mapped (otherwise map all)" optional="true"/> + <param name="mapq" type="integer" label="Samtools -q parameter" value="1"/> + <param name="baseq" type="integer" label="Samtools -Q parameter" value="20"/> + <param name="minins" type="integer" label="Bowtie 2 -I parameter. The minimum fragment length for valid paired-end alignments." value="0" > + <validator type="in_range" message="Must be less than -X parameter." min="0"/> + </param> + <param name="maxins" type="integer" label="Bowtie 2 -X parameter. The maximum fragment length for valid paired-end alignments." value="1000" > + <validator type="in_range" message="Must be greater than -I parameter." min="0"/> + </param> + </when> + <when value="basic"/> + </conditional> + + </inputs> + + <outputs> + <data format="tabular" label="SRST2 Results" name="results" from_work_dir="*.txt"/> + <data format="tabular" label="SRST2 Allele Scores" name="scores" from_work_dir="*.scores"> + <filter>job_type['job'] == "mlst"</filter> + </data> + <data format="tabular" label="SRST2 Profile Scores" name="pscores" from_work_dir="*.pscores"> + <filter> job_type['job'] == "mlst"</filter> + </data> + <data format="tabular" label="SRST2 Predicted Alleles" name="alleles" from_work_dir="*results.txt"> + <filter> job_type['job'] == "mlst"</filter> + </data> + <data format="tabular" label="SRST2 Gene Scores" name="gscores" from_work_dir="*.scores"> + <filter>job_type['job'] == "gene"</filter> + </data> + </outputs> + + <help><![CDATA[ + +SRST2 - Short Read Sequence Typer (v2) + +This program is designed to take Illumina sequence data, a MLST database and/or a database of gene sequences (e.g. resistance genes, virulence genes, etc) and report the presence of STs and/or reference genes. + + ]]></help> + + + <citations> + <citation type="bibtex"> + @misc{pope_dashnow_zobel_holt_raven_schultz_inouye_tomita_2014, + title={SRST2: Rapid genomic surveillance for public health and hospital microbiology labs}, + url={https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-014-0090-6}, + journal={Genome Medicine}, publisher={BioMed Central}, + author={Pope, Bernard J and Dashnow, Harriet and Zobel, Justin and Holt, Kathryn E and Raven, Lesley-Ann and Schultz, Mark B and Inouye, Michael and Tomita, Takehiro}, + year={2014}, month={Nov}} , + }</citation> + </citations> +</tool>