# HG changeset patch
# User estrain
# Date 1706887006 0
# Node ID dc12d6ac296ddea982b32cd6af7f49128e99881e
# Parent 7272a9c3614946fd082db1d5b906254ba14485f6
Fixed bug in MLST ouput
diff -r 7272a9c36149 -r dc12d6ac296d median_size.py
--- a/median_size.py Fri Jan 19 11:50:31 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-#!/usr/bin/env
-
-## Errol Strain (estrain@gmail.com)
-## calculate median insert size from sam file
-
-import numpy as np
-
-def get_data(infile):
- lengths = []
- for line in infile:
- if line.startswith('@'):
- pass
- else:
- line = line.rsplit()
- length = int(line[8])
- if length > 0:
- lengths.append(length)
- else:
- pass
- return lengths
-
-if __name__ == "__main__":
- import sys
- lengths = get_data(sys.stdin)
- md = int(np.median(lengths))
-print(md)
diff -r 7272a9c36149 -r dc12d6ac296d microrunqc.xml
--- a/microrunqc.xml Fri Jan 19 11:50:31 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,200 +0,0 @@
-
-
-
- skesa
- mlst
- bwa
- fastq-scan
-
-
- ${outname}.fasta;
-
- bwa index ${outname}.fasta;
- bwa mem -t $num_cores ${outname}.fasta ${bwalist} | python $__tool_directory__/median_size.py > insert.median;
-
- mlst --nopath --threads $num_cores --datadir $mlst_databases.fields.path/pubmlst --blastdb $mlst_databases.fields.path/blast/mlst.fa
- #if $options.select=="advanced"
- #if $options.minid
- --minid $options.minid
- #end if
- #if $options.mincov
- --mincov $options.mincov
- #end if
- #if $options.minscore
- --minscore $options.minscore
- #end if
- #end if
- ${outname}.fasta > ${outname}.mlst_raw.tsv;
-
- python $__tool_directory__/mlstAddFields.py ${outname}.mlst_raw.tsv $mlst_databases.fields.path/pubmlst > ${outname}.mlst.tsv;
-
- python $__tool_directory__/run_fastq_scan.py --fastq ${bwalist} --out fq_out.tab --type ${fqscan};
-
- python $__tool_directory__/sum_mlst.py --fasta ${outname}.fasta --mlst ${outname}.mlst.tsv --med insert.median --fqscan fq_out.tab --out sum_qc.txt
-
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- @misc{pope_dashnow_zobel_holt_raven_schultz_inouye_tomita_2014,
- title={skesa: eSKESA is a de-novo sequence read assembler for cultured single isolate genomes
- based on DeBruijn graphs. It uses conservative heuristics and is designed to
- create breaks at repeat regions in the genome. This leads to excellent sequence
- quality but not necessarily a large N50 statistic. It is a multi-threaded
- application that scales well with the number of processors. For different runs
- with the same inputs, including the order of reads, the order and orientation
- of contigs in the output is deterministic. },
- url={https://github.com/ncbi/ngs-tools/tree/master/tools/skesa/},
- author={National Center for Biotechnology Information },
- }
-
-
- @UNPUBLISHED{Seemann2016,
- author = "Seemann T",
- title = "MLST: Scan contig files against PubMLST typing schemes",
- year = "2016",
- url = {https://github.com/tseemann/mlst}
- }
-
-
diff -r 7272a9c36149 -r dc12d6ac296d microrunqc/median_size.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/microrunqc/median_size.py Fri Feb 02 15:16:46 2024 +0000
@@ -0,0 +1,26 @@
+#!/usr/bin/env
+
+## Errol Strain (estrain@gmail.com)
+## calculate median insert size from sam file
+
+import numpy as np
+
+def get_data(infile):
+ lengths = []
+ for line in infile:
+ if line.startswith('@'):
+ pass
+ else:
+ line = line.rsplit()
+ length = int(line[8])
+ if length > 0:
+ lengths.append(length)
+ else:
+ pass
+ return lengths
+
+if __name__ == "__main__":
+ import sys
+ lengths = get_data(sys.stdin)
+ md = int(np.median(lengths))
+print(md)
diff -r 7272a9c36149 -r dc12d6ac296d microrunqc/microrunqc.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/microrunqc/microrunqc.xml Fri Feb 02 15:16:46 2024 +0000
@@ -0,0 +1,200 @@
+
+
+
+ skesa
+ mlst
+ bwa
+ fastq-scan
+
+
+ ${outname}.fasta;
+
+ bwa index ${outname}.fasta;
+ bwa mem -t $num_cores ${outname}.fasta ${bwalist} | python $__tool_directory__/median_size.py > insert.median;
+
+ mlst --nopath --threads $num_cores --datadir $mlst_databases.fields.path/pubmlst --blastdb $mlst_databases.fields.path/blast/mlst.fa
+ #if $options.select=="advanced"
+ #if $options.minid
+ --minid $options.minid
+ #end if
+ #if $options.mincov
+ --mincov $options.mincov
+ #end if
+ #if $options.minscore
+ --minscore $options.minscore
+ #end if
+ #end if
+ ${outname}.fasta > ${outname}.mlst_raw.tsv;
+
+ python $__tool_directory__/mlstAddFields.py ${outname}.mlst_raw.tsv $mlst_databases.fields.path/pubmlst > ${outname}.mlst.tsv;
+
+ python $__tool_directory__/run_fastq_scan.py --fastq ${bwalist} --out fq_out.tab --type ${fqscan};
+
+ python $__tool_directory__/sum_mlst.py --fasta ${outname}.fasta --mlst ${outname}.mlst.tsv --med insert.median --fqscan fq_out.tab --out sum_qc.txt
+
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @misc{pope_dashnow_zobel_holt_raven_schultz_inouye_tomita_2014,
+ title={skesa: eSKESA is a de-novo sequence read assembler for cultured single isolate genomes
+ based on DeBruijn graphs. It uses conservative heuristics and is designed to
+ create breaks at repeat regions in the genome. This leads to excellent sequence
+ quality but not necessarily a large N50 statistic. It is a multi-threaded
+ application that scales well with the number of processors. For different runs
+ with the same inputs, including the order of reads, the order and orientation
+ of contigs in the output is deterministic. },
+ url={https://github.com/ncbi/ngs-tools/tree/master/tools/skesa/},
+ author={National Center for Biotechnology Information },
+ }
+
+
+ @UNPUBLISHED{Seemann2016,
+ author = "Seemann T",
+ title = "MLST: Scan contig files against PubMLST typing schemes",
+ year = "2016",
+ url = {https://github.com/tseemann/mlst}
+ }
+
+
diff -r 7272a9c36149 -r dc12d6ac296d microrunqc/mlst.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/microrunqc/mlst.loc.sample Fri Feb 02 15:16:46 2024 +0000
@@ -0,0 +1,6 @@
+# this is a tab separated file describing the location of mlst databases
+#
+# the columns are:
+# value name path
+#
+# for example
diff -r 7272a9c36149 -r dc12d6ac296d microrunqc/mlstAddFields.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/microrunqc/mlstAddFields.py Fri Feb 02 15:16:46 2024 +0000
@@ -0,0 +1,78 @@
+#!/usr/bin/env
+
+import sys
+import csv
+
+def find_index(headers, term):
+ try:
+ return headers.index(term)
+ except ValueError:
+ return -1
+
+def main(mlst_file, db_path=None):
+ with open(mlst_file, 'r') as file:
+ reader = csv.reader(file, delimiter='\t')
+ mlstout = next(reader)
+
+ schema = mlstout[1]
+ mlstST = mlstout[2]
+
+ # Return the output without appending if schema equals "-"
+ if schema == "-":
+ print("\t".join(mlstout))
+ return
+
+ if db_path is None:
+ # If no database path is provided, find it using an external command
+ # This requires the 'mlst' command to be installed and available in the path
+ import subprocess
+ mlstdesc = subprocess.check_output(['mlst', '-h']).decode()
+ db_pubmlst = [line for line in mlstdesc.split('\n') if 'db/pubmlst' in line]
+ if db_pubmlst:
+ mlstloc = db_pubmlst[0].split("'")[1].replace("bin/..", "")
+ else:
+ raise Exception("Could not find MLST database location.")
+ else:
+ mlstloc = db_path
+
+ mlst_file_path = f"{mlstloc}/{schema}/{schema}.txt"
+
+ schema_dict = {}
+ with open(mlst_file_path, 'r') as file:
+ reader = csv.reader(file, delimiter='\t')
+ headers = next(reader)
+
+ clonal = find_index(headers, 'clonal_complex')
+ cc = find_index(headers, 'CC')
+ lineage = find_index(headers, 'Lineage')
+ species = find_index(headers, 'species')
+
+ for line in reader:
+ desc = []
+ if clonal > -1 and line[clonal]:
+ desc.append(f"clonal_complex={line[clonal]}")
+ if cc > -1 and line[cc]:
+ desc.append(f"CC={line[cc]}")
+ if lineage > -1 and line[lineage]:
+ desc.append(f"Lineage={line[lineage]}")
+ if species > -1 and line[species]:
+ desc.append(f"species={line[species]}")
+ schema_dict[line[0]] = ','.join(desc)
+
+ output = mlstout[:3]
+ if mlstST in schema_dict:
+ output.append(schema_dict[mlstST])
+ output.extend(mlstout[3:])
+
+ print("\t".join(output))
+
+if __name__ == "__main__":
+ if len(sys.argv) < 2:
+ print("Usage: python mlstAddFields.py [db_path]")
+ sys.exit(1)
+
+ mlst_file = sys.argv[1]
+ db_path = sys.argv[2] if len(sys.argv) > 2 else None
+
+ main(mlst_file, db_path)
+
diff -r 7272a9c36149 -r dc12d6ac296d microrunqc/run_fastq_scan.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/microrunqc/run_fastq_scan.py Fri Feb 02 15:16:46 2024 +0000
@@ -0,0 +1,54 @@
+#!/usr/bin/env
+
+## Run fastq-scan to get mean read length and mean quality score
+## author: errol strain, estrain@gmail.com
+
+from argparse import (ArgumentParser, FileType)
+import sys
+import glob
+import subprocess
+import json
+
+def parse_args():
+ "Parse the input arguments, use '-h' for help."
+
+ parser = ArgumentParser(description='Run fastq-scan on a pair of gzipped FASTQ files')
+
+ # Read inputs
+ parser.add_argument('--fastq', type=str, required=True, nargs=2, help='FASTQ files')
+ parser.add_argument('--output', type=str, required=True, nargs=1, help='Output File')
+ parser.add_argument('--type', type=str, required=True, nargs=1, help='File Type (text or gz)')
+
+ return parser.parse_args()
+
+args =parse_args()
+
+# FASTA file
+r1 = args.fastq[0]
+r2 = args.fastq[1]
+
+# Read 1
+if str(args.type[0]) == "gz" :
+ cmd1 = ["zcat", r1]
+else :
+ cmd1 = ["cat", r1]
+cmd2 = ["fastq-scan"]
+pcmd1= subprocess.Popen(cmd1,stdout= subprocess.PIPE,shell=False)
+r1json = json.loads(subprocess.Popen(cmd2, stdin=pcmd1.stdout,stdout=subprocess.PIPE,shell=False).communicate()[0])
+r1q = round(r1json["qc_stats"]["qual_mean"],1)
+r1l = round(r1json["qc_stats"]["read_mean"],1)
+
+# Read 2
+if str(args.type[0]) == "gz" :
+ cmd1 = ["zcat", r2]
+else :
+ cmd1 = ["cat", r2]
+cmd2 = ["fastq-scan"]
+pcmd1= subprocess.Popen(cmd1,stdout= subprocess.PIPE,shell=False)
+r2json = json.loads(subprocess.Popen(cmd2, stdin=pcmd1.stdout,stdout=subprocess.PIPE,shell=False).communicate()[0])
+r2q = round(r2json["qc_stats"]["qual_mean"],1)
+r2l = round(r2json["qc_stats"]["read_mean"],1)
+
+# Write output to be used by sum_mlst.py
+output = open(args.output[0],"w")
+output.write(str(r1l) + "\t" + str(r2l) + "\t" + str(r1q) + "\t" + str(r2q))
diff -r 7272a9c36149 -r dc12d6ac296d microrunqc/sum_mlst.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/microrunqc/sum_mlst.py Fri Feb 02 15:16:46 2024 +0000
@@ -0,0 +1,79 @@
+#!/usr/bin/env
+
+## Generate basic summary stats from SKESA, fastq-scan, and MLST output.
+## author: errol strain, estrain@gmail.com
+
+from argparse import (ArgumentParser, FileType)
+import sys
+import glob
+import subprocess
+from decimal import Decimal
+
+def parse_args():
+ "Parse the input arguments, use '-h' for help."
+
+ parser = ArgumentParser(description='Generate Basic Summary Statistics from SKESA assemblies, fastq-scan output, and MLST reports')
+
+ # Read inputs
+ parser.add_argument('--fasta', type=str, required=True, nargs=1, help='SKESA FASTA assembly')
+ parser.add_argument('--mlst', type=str, required=True, nargs=1, help='MLST output')
+ parser.add_argument('--fqscan', type=str, required=True, nargs=1, help='fastq-scan output')
+ parser.add_argument('--med', type=str, required=True, nargs=1, help='Median Insert Size')
+ parser.add_argument('--output', type=str, required=True, nargs=1, help='Output File')
+
+ return parser.parse_args()
+
+args =parse_args()
+
+# FASTA file
+fasta = args.fasta[0]
+
+# Get individual and total length of contigs
+cmd = ["awk", "/^>/ {if (seqlen){print seqlen}; ;seqlen=0;next; } { seqlen = seqlen +length($0)}END{print seqlen}",fasta]
+seqlen = subprocess.Popen(cmd,stdout= subprocess.PIPE).communicate()[0]
+intlen = list(map(int,seqlen.splitlines()))
+totlen = sum(intlen)
+# Count number of contigs
+numtigs = len(intlen)
+
+# Get coverage information from skesa fasta header
+cmd1 = ["grep",">",fasta]
+cmd2 = ["cut","-f","3","-d","_"]
+p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE)
+p2 = subprocess.Popen(cmd2, stdin=p1.stdout, stdout=subprocess.PIPE).communicate()[0]
+covdep = map(float,p2.splitlines())
+covlist = [a*b for a,b in zip([float(i) for i in intlen],covdep)]
+covdep = round(sum(covlist)/totlen,1)
+
+# Calculate N50
+vals = [int(i) for i in intlen]
+vals.sort(reverse=True)
+n50=0
+for counter in range(0,len(vals)-1):
+ if sum(vals[0:counter]) > (totlen/2):
+ n50=vals[counter-1]
+ break
+
+# Read in MLST output
+mlst = open(args.mlst[0],"r")
+profile = mlst.readline()
+els = profile.split("\t")
+
+# Read in median insert size
+medfile = open(args.med[0],"r")
+insert = medfile.readline()
+insert = insert.rstrip()
+
+# Read in fastq-scan
+fqfile = open(args.fqscan[0],"r")
+fq = fqfile.readline()
+fq = fq.rstrip()
+
+output = open(args.output[0],"w")
+
+filehead = str("File\tContigs\tLength\tEstCov\tN50\tMedianInsert\tMeanLength_R1\tMeanLength_R2\tMeanQ_R1\tMeanQ_R2\tScheme\tST\n")
+output.write(filehead)
+
+output.write(str(fasta) + "\t" + str(numtigs) + "\t" + str(totlen) + "\t" + str(covdep) + "\t" + str(n50) +"\t" + str(insert) + "\t" + str(fq))
+for counter in range(1,len(els)):
+ output.write("\t" + str(els[counter]))
diff -r 7272a9c36149 -r dc12d6ac296d microrunqc/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/microrunqc/tool_data_table_conf.xml.sample Fri Feb 02 15:16:46 2024 +0000
@@ -0,0 +1,7 @@
+
+
+
+
diff -r 7272a9c36149 -r dc12d6ac296d mlst.loc.sample
--- a/mlst.loc.sample Fri Jan 19 11:50:31 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-# this is a tab separated file describing the location of mlst databases
-#
-# the columns are:
-# value name path
-#
-# for example
diff -r 7272a9c36149 -r dc12d6ac296d mlstAddFields.py
--- a/mlstAddFields.py Fri Jan 19 11:50:31 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-#!/usr/bin/env
-
-import sys
-import csv
-
-def find_index(headers, term):
- try:
- return headers.index(term)
- except ValueError:
- return -1
-
-def main(mlst_file, db_path=None):
- with open(mlst_file, 'r') as file:
- reader = csv.reader(file, delimiter='\t')
- mlstout = next(reader)
-
- schema = mlstout[1]
- mlstST = mlstout[2]
-
- if db_path is None:
- # If no database path is provided, find it using an external command
- # This requires the 'mlst' command to be installed and available in the path
- import subprocess
- mlstdesc = subprocess.check_output(['mlst', '-h']).decode()
- db_pubmlst = [line for line in mlstdesc.split('\n') if 'db/pubmlst' in line]
- if db_pubmlst:
- mlstloc = db_pubmlst[0].split("'")[1].replace("bin/..", "")
- else:
- raise Exception("Could not find MLST database location.")
- else:
- mlstloc = db_path
-
- mlst_file_path = f"{mlstloc}/{schema}/{schema}.txt"
-
- schema_dict = {}
- with open(mlst_file_path, 'r') as file:
- reader = csv.reader(file, delimiter='\t')
- headers = next(reader)
-
- clonal = find_index(headers, 'clonal_complex')
- cc = find_index(headers, 'CC')
- lineage = find_index(headers, 'Lineage')
- species = find_index(headers, 'species')
-
- for line in reader:
- desc = []
- if clonal > -1 and line[clonal]:
- desc.append(f"clonal_complex={line[clonal]}")
- if cc > -1 and line[cc]:
- desc.append(f"CC={line[cc]}")
- if lineage > -1 and line[lineage]:
- desc.append(f"Lineage={line[lineage]}")
- if species > -1 and line[species]:
- desc.append(f"species={line[species]}")
- schema_dict[line[0]] = ','.join(desc)
-
- output = mlstout[:3]
- if mlstST in schema_dict:
- output.append(schema_dict[mlstST])
- output.extend(mlstout[3:])
-
- print("\t".join(output))
-
-if __name__ == "__main__":
- if len(sys.argv) < 2:
- print("Usage: python mlstAddFields.py [db_path]")
- sys.exit(1)
-
- mlst_file = sys.argv[1]
- db_path = sys.argv[2] if len(sys.argv) > 2 else None
-
- main(mlst_file, db_path)
-
diff -r 7272a9c36149 -r dc12d6ac296d run_fastq_scan.py
--- a/run_fastq_scan.py Fri Jan 19 11:50:31 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-#!/usr/bin/env
-
-## Run fastq-scan to get mean read length and mean quality score
-## author: errol strain, estrain@gmail.com
-
-from argparse import (ArgumentParser, FileType)
-import sys
-import glob
-import subprocess
-import json
-
-def parse_args():
- "Parse the input arguments, use '-h' for help."
-
- parser = ArgumentParser(description='Run fastq-scan on a pair of gzipped FASTQ files')
-
- # Read inputs
- parser.add_argument('--fastq', type=str, required=True, nargs=2, help='FASTQ files')
- parser.add_argument('--output', type=str, required=True, nargs=1, help='Output File')
- parser.add_argument('--type', type=str, required=True, nargs=1, help='File Type (text or gz)')
-
- return parser.parse_args()
-
-args =parse_args()
-
-# FASTA file
-r1 = args.fastq[0]
-r2 = args.fastq[1]
-
-# Read 1
-if str(args.type[0]) == "gz" :
- cmd1 = ["zcat", r1]
-else :
- cmd1 = ["cat", r1]
-cmd2 = ["fastq-scan"]
-pcmd1= subprocess.Popen(cmd1,stdout= subprocess.PIPE,shell=False)
-r1json = json.loads(subprocess.Popen(cmd2, stdin=pcmd1.stdout,stdout=subprocess.PIPE,shell=False).communicate()[0])
-r1q = round(r1json["qc_stats"]["qual_mean"],1)
-r1l = round(r1json["qc_stats"]["read_mean"],1)
-
-# Read 2
-if str(args.type[0]) == "gz" :
- cmd1 = ["zcat", r2]
-else :
- cmd1 = ["cat", r2]
-cmd2 = ["fastq-scan"]
-pcmd1= subprocess.Popen(cmd1,stdout= subprocess.PIPE,shell=False)
-r2json = json.loads(subprocess.Popen(cmd2, stdin=pcmd1.stdout,stdout=subprocess.PIPE,shell=False).communicate()[0])
-r2q = round(r2json["qc_stats"]["qual_mean"],1)
-r2l = round(r2json["qc_stats"]["read_mean"],1)
-
-# Write output to be used by sum_mlst.py
-output = open(args.output[0],"w")
-output.write(str(r1l) + "\t" + str(r2l) + "\t" + str(r1q) + "\t" + str(r2q))
diff -r 7272a9c36149 -r dc12d6ac296d sum_mlst.py
--- a/sum_mlst.py Fri Jan 19 11:50:31 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-#!/usr/bin/env
-
-## Generate basic summary stats from SKESA, fastq-scan, and MLST output.
-## author: errol strain, estrain@gmail.com
-
-from argparse import (ArgumentParser, FileType)
-import sys
-import glob
-import subprocess
-from decimal import Decimal
-
-def parse_args():
- "Parse the input arguments, use '-h' for help."
-
- parser = ArgumentParser(description='Generate Basic Summary Statistics from SKESA assemblies, fastq-scan output, and MLST reports')
-
- # Read inputs
- parser.add_argument('--fasta', type=str, required=True, nargs=1, help='SKESA FASTA assembly')
- parser.add_argument('--mlst', type=str, required=True, nargs=1, help='MLST output')
- parser.add_argument('--fqscan', type=str, required=True, nargs=1, help='fastq-scan output')
- parser.add_argument('--med', type=str, required=True, nargs=1, help='Median Insert Size')
- parser.add_argument('--output', type=str, required=True, nargs=1, help='Output File')
-
- return parser.parse_args()
-
-args =parse_args()
-
-# FASTA file
-fasta = args.fasta[0]
-
-# Get individual and total length of contigs
-cmd = ["awk", "/^>/ {if (seqlen){print seqlen}; ;seqlen=0;next; } { seqlen = seqlen +length($0)}END{print seqlen}",fasta]
-seqlen = subprocess.Popen(cmd,stdout= subprocess.PIPE).communicate()[0]
-intlen = list(map(int,seqlen.splitlines()))
-totlen = sum(intlen)
-# Count number of contigs
-numtigs = len(intlen)
-
-# Get coverage information from skesa fasta header
-cmd1 = ["grep",">",fasta]
-cmd2 = ["cut","-f","3","-d","_"]
-p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE)
-p2 = subprocess.Popen(cmd2, stdin=p1.stdout, stdout=subprocess.PIPE).communicate()[0]
-covdep = map(float,p2.splitlines())
-covlist = [a*b for a,b in zip([float(i) for i in intlen],covdep)]
-covdep = round(sum(covlist)/totlen,1)
-
-# Calculate N50
-vals = [int(i) for i in intlen]
-vals.sort(reverse=True)
-n50=0
-for counter in range(0,len(vals)-1):
- if sum(vals[0:counter]) > (totlen/2):
- n50=vals[counter-1]
- break
-
-# Read in MLST output
-mlst = open(args.mlst[0],"r")
-profile = mlst.readline()
-els = profile.split("\t")
-
-# Read in median insert size
-medfile = open(args.med[0],"r")
-insert = medfile.readline()
-insert = insert.rstrip()
-
-# Read in fastq-scan
-fqfile = open(args.fqscan[0],"r")
-fq = fqfile.readline()
-fq = fq.rstrip()
-
-output = open(args.output[0],"w")
-
-filehead = str("File\tContigs\tLength\tEstCov\tN50\tMedianInsert\tMeanLength_R1\tMeanLength_R2\tMeanQ_R1\tMeanQ_R2\tScheme\tST\n")
-output.write(filehead)
-
-output.write(str(fasta) + "\t" + str(numtigs) + "\t" + str(totlen) + "\t" + str(covdep) + "\t" + str(n50) +"\t" + str(insert) + "\t" + str(fq))
-for counter in range(1,len(els)):
- output.write("\t" + str(els[counter]))
diff -r 7272a9c36149 -r dc12d6ac296d tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Fri Jan 19 11:50:31 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-
-
-
-