changeset 0:db4fe7172114 draft

"planemo upload for repository https://github.com/katholt/sonneityping commit 1b4231e8f98a234c29f57fef400f58f14645dc95"
author iuc
date Thu, 16 Sep 2021 06:52:18 +0000
parents
children 0ac12c344bfa
files macros.xml parse_mykrobe_predict.xml test-data/SRR6114360.json test-data/alleles.txt
diffstat 4 files changed, 451 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Sep 16 06:52:18 2021 +0000
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">sonneityping</requirement>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">20210201</token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/parse_mykrobe_predict.xml	Thu Sep 16 06:52:18 2021 +0000
@@ -0,0 +1,71 @@
+<tool id="sonneityping" name="parse mykrobe predict" version="@TOOL_VERSION@" profile="20.05">
+    <description>for Shigella sonnei and tabulate results into a single tab-delimited file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        parse_mykrobe_predict --jsons
+
+        #for $i in $json
+            '$i'
+        #end for
+
+        #if $optional.alleles:
+            --alleles '$optional.alleles'
+        #end if
+
+        --prefix results
+
+    ]]></command>
+    <inputs>
+        <param name="json" type="data" format="json" multiple="true" optional="false" label="Mykrobe results to parse." help="Mykrobe predict results in JSON format." />
+
+        <section name="optional" title="Optional Parameters" expanded="false">
+            <param name="alleles" type="data" format="txt,tabular" optional="true" label="Alleles" help="Links each lineage name to its human-readable counterpart." />
+        </section>
+    </inputs>
+    <outputs>
+        <data format="tabular" label="parsed output" name="parsed_output" from_work_dir="results_predictResults.tsv" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="json" ftype="json" value="SRR6114360.json"/>
+            <param name="optional.alleles" ftype="txt" value="alleles.txt" />
+            <output name="parsed_output">
+                <assert_contents>
+                    <has_text text="3.7.30.4" />
+                    <has_text text="S. sonnei" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="json" ftype="json" value="SRR6114360.json"/>
+            <output name="parsed_output">
+                <assert_contents>
+                    <has_text text="3.7.30.4" />
+                    <has_text text="S. sonnei" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Parse Mykrobe Predict
+=====================
+
+Parses Mykrobe predict results for Shigella sonnei.
+
+Mykrobe v0.9.0+ can identify input genomes as S. sonnei, assign those identified as S. sonnei to hierarchical genotypes based on detection of single nucleotide variants (SNVs; defined in the file alleles.txt), and report known mutations in the quinolone-resistance determining region (QRDR) of genes gyrA (S83L, D87G, D87Y) and parC (S80I).
+
+Details of the genotyping scheme are available in the paper Hawkey et al, 2021, Nature Communications. Proposals for new genotype definitions to be added to the scheme can be submitted as an Issue in this repository.]]></help>
+    <citations>
+		<citation type="bibtex">
+			@misc{GitHubsonneityping,
+			title = {sonneityping},
+			journal = {GitHub Repository},
+			url = {https://github.com/katholt/sonneityping},
+			}
+		</citation>
+    </citations>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRR6114360.json	Thu Sep 16 06:52:18 2021 +0000
@@ -0,0 +1,223 @@
+{
+    "SRR6114360": {
+        "susceptibility": {
+            "quinolones": {
+                "predict": "S"
+            }
+        },
+        "phylogenetics": {
+            "phylo_group": {
+                "Ecoli_Shigella": {
+                    "percent_coverage": 100.0,
+                    "median_depth": 26
+                }
+            },
+            "sub_complex": {
+                "Unknown": {
+                    "percent_coverage": -1,
+                    "median_depth": -1
+                }
+            },
+            "species": {
+                "Shigella_sonnei": {
+                    "percent_coverage": 95.163,
+                    "median_depth": 22.0
+                }
+            },
+            "lineage": {
+                "lineage": [
+                    "lineage3.7.30.4"
+                ],
+                "calls_summary": {
+                    "lineage3.7.30.4": {
+                        "good_nodes": 4,
+                        "tree_depth": 4,
+                        "genotypes": {
+                            "lineage3": 1,
+                            "lineage3.7": 1,
+                            "lineage3.7.30": 1,
+                            "lineage3.7.30.4": 1
+                        }
+                    }
+                },
+                "calls": {
+                    "lineage3.7.30.4": {
+                        "lineage3": {
+                            "C59602T": {
+                                "variant": "ref-C59602T?var_name=C59602T&num_alts=1&ref=NC_016822&enum=0&gene=NA&mut=C59602T",
+                                "genotype": [
+                                    1,
+                                    1
+                                ],
+                                "genotype_likelihoods": [
+                                    -3282.0148702940733,
+                                    -99999999,
+                                    -19.32539954665346
+                                ],
+                                "info": {
+                                    "coverage": {
+                                        "reference": {
+                                            "percent_coverage": 0.0,
+                                            "median_depth": 0,
+                                            "min_non_zero_depth": 0,
+                                            "kmer_count": 0,
+                                            "klen": 21
+                                        },
+                                        "alternate": {
+                                            "percent_coverage": 100.0,
+                                            "median_depth": 27,
+                                            "min_non_zero_depth": 26,
+                                            "kmer_count": 541,
+                                            "klen": 21
+                                        }
+                                    },
+                                    "expected_depths": [
+                                        26
+                                    ],
+                                    "contamination_depths": [],
+                                    "filter": [],
+                                    "conf": 3263
+                                },
+                                "_cls": "Call.VariantCall"
+                            }
+                        },
+                        "lineage3.7": {
+                            "G4080478A": {
+                                "variant": "ref-G4080478A?var_name=G4080478A&num_alts=1&ref=NC_016822&enum=0&gene=NA&mut=G4080478A",
+                                "genotype": [
+                                    1,
+                                    1
+                                ],
+                                "genotype_likelihoods": [
+                                    -3961.55750528479,
+                                    -99999999,
+                                    -39.85243726626552
+                                ],
+                                "info": {
+                                    "coverage": {
+                                        "reference": {
+                                            "percent_coverage": 0.0,
+                                            "median_depth": 0,
+                                            "min_non_zero_depth": 0,
+                                            "kmer_count": 0,
+                                            "klen": 21
+                                        },
+                                        "alternate": {
+                                            "percent_coverage": 100.0,
+                                            "median_depth": 34,
+                                            "min_non_zero_depth": 34,
+                                            "kmer_count": 702,
+                                            "klen": 21
+                                        }
+                                    },
+                                    "expected_depths": [
+                                        26
+                                    ],
+                                    "contamination_depths": [],
+                                    "filter": [],
+                                    "conf": 3922
+                                },
+                                "_cls": "Call.VariantCall"
+                            }
+                        },
+                        "lineage3.7.30": {
+                            "A1138004G": {
+                                "variant": "ref-A1138004G?var_name=A1138004G&num_alts=2&ref=NC_016822&enum=0&gene=NA&mut=A1138004G",
+                                "genotype": [
+                                    1,
+                                    1
+                                ],
+                                "genotype_likelihoods": [
+                                    -1607.6280372631725,
+                                    -99999999,
+                                    -477.52941234336663
+                                ],
+                                "info": {
+                                    "coverage": {
+                                        "reference": {
+                                            "percent_coverage": 0.0,
+                                            "median_depth": 0,
+                                            "min_non_zero_depth": 0,
+                                            "kmer_count": 0,
+                                            "klen": 21
+                                        },
+                                        "alternate": {
+                                            "percent_coverage": 100.0,
+                                            "median_depth": 1,
+                                            "min_non_zero_depth": 1,
+                                            "kmer_count": 20,
+                                            "klen": 21
+                                        }
+                                    },
+                                    "expected_depths": [
+                                        26
+                                    ],
+                                    "contamination_depths": [],
+                                    "filter": [
+                                        "LOW_TOTAL_DEPTH"
+                                    ],
+                                    "conf": 1130
+                                },
+                                "_cls": "Call.VariantCall"
+                            }
+                        },
+                        "lineage3.7.30.4": {
+                            "G3082193A": {
+                                "variant": "ref-G3082193A?var_name=G3082193A&num_alts=1&ref=NC_016822&enum=0&gene=NA&mut=G3082193A",
+                                "genotype": [
+                                    1,
+                                    1
+                                ],
+                                "genotype_likelihoods": [
+                                    -2726.2314885023525,
+                                    -99999999,
+                                    -40.69232343956539
+                                ],
+                                "info": {
+                                    "coverage": {
+                                        "reference": {
+                                            "percent_coverage": 0.0,
+                                            "median_depth": 0,
+                                            "min_non_zero_depth": 0,
+                                            "kmer_count": 0,
+                                            "klen": 21
+                                        },
+                                        "alternate": {
+                                            "percent_coverage": 100.0,
+                                            "median_depth": 20,
+                                            "min_non_zero_depth": 18,
+                                            "kmer_count": 400,
+                                            "klen": 21
+                                        }
+                                    },
+                                    "expected_depths": [
+                                        26
+                                    ],
+                                    "contamination_depths": [],
+                                    "filter": [],
+                                    "conf": 2686
+                                },
+                                "_cls": "Call.VariantCall"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "kmer": 21,
+        "probe_sets": [
+            "/home/eric/miniconda3/lib/python3.8/site-packages/mykrobe/data/sonnei/sonnei.lineage.20210201.probes.fa.gz",
+            "/home/eric/miniconda3/lib/python3.8/site-packages/mykrobe/data/sonnei/sonnei.mlst.20201010.sonnei.fa.gz",
+            "/home/eric/miniconda3/lib/python3.8/site-packages/mykrobe/data/sonnei/sonnei.probe.uidA.20201010.fa.gz",
+            "/home/eric/miniconda3/lib/python3.8/site-packages/mykrobe/data/sonnei/sonnei.qrdr.20201010.probes.fa.gz"
+        ],
+        "files": [
+            "SRR6114360_1"
+        ],
+        "version": {
+            "mykrobe-predictor": "v0.10.0",
+            "mykrobe-atlas": "v0.10.0"
+        },
+        "genotype_model": "kmer_count"
+    }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/alleles.txt	Thu Sep 16 06:52:18 2021 +0000
@@ -0,0 +1,148 @@
+159897	C	1	Lineage I	lineage1
+204118	G	1.1	-	lineage1.1
+1173345	T	1.2	-	lineage1.2
+494060	C	1.3	-	lineage1.3
+434389	T	1.4	-	lineage1.4
+3465611	A	1.5	-	lineage1.5
+2164632	C	1.5.1	-	lineage1.5.1
+2138308	A	1.5.2	-	lineage1.5.2
+220353	T	1.5.3	-	lineage1.5.3
+1411472	A	1.6	-	lineage1.6
+59422	C	1.6.1	-	lineage1.6.1
+452958	G	1.6.2	-	lineage1.6.2
+4108759	A	1.6.3	-	lineage1.6.3
+4180475	A	1.6.4	-	lineage1.6.4
+4967081	G	2	Lineage II	lineage2
+3922942	A	2.1	-	lineage2.1
+81464	T	2.1.1	-	lineage2.1.1
+765305	T	2.1.2	-	lineage2.1.2
+1148833	T	2.1.3	-	lineage2.1.3
+2751379	A	2.1.4	-	lineage2.1.4
+3209745	G	2.1.5	-	lineage2.1.5
+2163231	A	2.1.6	-	lineage2.1.6
+4969251	T	2.1.7	-	lineage2.1.7
+3277620	A	2.1.8	-	lineage2.1.8
+2964241	C	2.2	-	lineage2.2
+3231002	A	2.3	-	lineage2.3
+3209436	A	2.4	-	lineage2.4
+4418301	A	2.4.1	-	lineage2.4.1
+1995501	A	2.4.2	-	lineage2.4.2
+2693836	A	2.4.3	-	lineage2.4.3
+3171681	A	2.5	-	lineage2.5
+2163325	A	2.5.1	-	lineage2.5.1
+2137487	T	2.5.2	-	lineage2.5.2
+3943215	A	2.6	-	lineage2.6
+3759854	C	2.6.1	-	lineage2.6.1
+2575942	T	2.6.2	-	lineage2.6.2
+3709499	T	2.7	-	lineage2.7
+4853458	T	2.7.1	-	lineage2.7.1
+527464	T	2.7.2	-	lineage2.7.2
+398556	A	2.7.3	-	lineage2.7.3
+149526	A	2.7.4	-	lineage2.7.4
+2582118	G	2.8	-	lineage2.8
+1658839	T	2.8.1	Korea II	lineage2.8.1
+4759777	C	2.8.2	-	lineage2.8.2
+3295184	A	2.9	Latin America IIa	lineage2.9
+159867	A	2.9.1	Latin America IIa	lineage2.9.1
+2161563	A	2.9.2	Latin America IIa	lineage2.9.2
+3687240	T	2.1	-	lineage2.1
+693457	T	2.10.1	Latin America IIa	lineage2.10.1
+3815385	T	2.10.2	Latin America IIa	lineage2.10.2
+689910	T	2.10.3	Latin America IIa	lineage2.10.3
+3658948	A	2.10.4	Latin America IIa	lineage2.10.4
+103132	G	2.10.5	Latin America IIa	lineage2.10.5
+12848	G	2.10.6	Latin America IIa	lineage2.10.6
+4568931	T	2.10.7	Latin America IIa	lineage2.10.7
+2019490	C	2.10.8	Latin America IIa	lineage2.10.8
+3780988	T	2.10.9	Latin America IIa	lineage2.10.9
+4655571	T	2.11	Latin America IIb	lineage2.11
+1097146	T	2.11.1	Latin America IIb	lineage2.11.1
+3643604	A	2.11.2	Latin America IIb	lineage2.11.2
+3231524	A	2.11.3	Latin America IIb	lineage2.11.3
+4315218	A	2.11.4	Latin America IIb	lineage2.11.4
+1039929	A	2.11.5	Latin America IIb	lineage2.11.5
+815462	T	2.12	Latin America IIb	lineage2.12
+4021481	A	2.12.1	Latin America IIb	lineage2.12.1
+3835124	A	2.12.2	Latin America IIb	lineage2.12.2
+2142799	A	2.12.3	Latin America IIb	lineage2.12.3
+1071597	G	2.12.4	Latin America IIb	lineage2.12.4
+59602	T	3	Lineage III	lineage3
+2834293	C	3.1	-	lineage3.1
+149058	A	3.2	-	lineage3.2
+1632620	C	3.3	-	lineage3.3
+635476	A	3.4	Latin America III	lineage3.4
+950923	T	3.4.1	Latin America III	lineage3.4.1
+1424781	T	3.4.2	Latin America III	lineage3.4.2
+2591332	G	3.4.3	Latin America III	lineage3.4.3
+2565078	T	3.4.4	Latin America III	lineage3.4.4
+3579150	T	3.4.5	Latin America III	lineage3.4.5
+1058810	T	3.4.6	Latin America III	lineage3.4.6
+1236451	G	3.5	-	lineage3.5
+3618381	A	3.6	Central Asia III	lineage3.6
+484124	T	3.6.1	CipR_parent	lineage3.6.1
+4639269	G	3.6.1.1	CipR	lineage3.6.1.1
+3861202	C	3.6.1.1.1	CipR.SEA	lineage3.6.1.1.1
+954442	T	3.6.1.1.2	CipR.MSM5	lineage3.6.1.1.2
+2335724	T	3.6.1.1.3	CipR	lineage3.6.1.1.3
+3314208	T	3.6.1.1.3.1	CipR.MSM1	lineage3.6.1.1.3.1
+3338871	A	3.6.2	Central Asia III	lineage3.6.2
+4134508	T	3.6.3	Central Asia III	lineage3.6.3
+985650	A	3.6.4	Central Asia III	lineage3.6.4
+4080478	A	3.7	Global III	lineage3.7
+3834998	G	3.7.1	Global III	lineage3.7.1
+2362814	A	3.7.3	Global III	lineage3.7.3
+3835031	T	3.7.4	Global III	lineage3.7.4
+1770481	T	3.7.5	Global III	lineage3.7.5
+2464143	A	3.7.6	Global III	lineage3.7.6
+4079006	T	3.7.7	Global III	lineage3.7.7
+3890722	A	3.7.8	Global III	lineage3.7.8
+2244329	C	3.7.9	Global III	lineage3.7.9
+2831896	A	3.7.10	Global III	lineage3.7.10
+2564547	A	3.7.11	Global III	lineage3.7.11
+2347512	A	3.7.12	Global III	lineage3.7.12
+2057175	A	3.7.13	Global III	lineage3.7.13
+3649319	A	3.7.14	Global III	lineage3.7.14
+3780349	C	3.7.15	Global III	lineage3.7.15
+1103408	T	3.7.16	Global III	lineage3.7.16
+498700	T	3.7.17	Global III	lineage3.7.17
+2052270	T	3.7.18	Global III	lineage3.7.18
+4599365	T	3.7.19	Global III	lineage3.7.19
+442220	T	3.7.20	Global III	lineage3.7.20
+643175	T	3.7.21	Global III	lineage3.7.21
+3624938	C	3.7.22	Global III	lineage3.7.22
+2188269	T	3.7.23	Global III	lineage3.7.23
+12692	A	3.7.24	Global III	lineage3.7.24
+3336178	T	3.7.25	MSM4	lineage3.7.25
+656895	A	3.7.26	Global III	lineage3.7.26
+4249495	A	3.7.27	Global III	lineage3.7.27
+4716168	T	3.7.28	Global III	lineage3.7.28
+4223696	T	3.7.29	Global III VN	lineage3.7.29
+4149688	A	3.7.29.1	Global III VN2	lineage3.7.29.1
+4248085	T	3.7.29.1.1	Global III VN3	lineage3.7.29.1.1
+1695964	A	3.7.29.1.1.1	Global III VN3.KH2	lineage3.7.29.1.1.1
+2392221	T	3.7.29.1.1.2	Global III VN4	lineage3.7.29.1.1.2
+985914	T	3.7.29.1.2	Global III VN2.MSM2	lineage3.7.29.1.2
+1058820	T	3.7.29.1.2.1	Global III VN2.MSM2.Aus	lineage3.7.29.1.2.1
+3638083	T	3.7.29.1.3	Global III VN2.Hue	lineage3.7.29.1.3
+2151320	T	3.7.29.1.4	Global III VN2.KH1	lineage3.7.29.1.4
+294523	T	3.7.29.1.4.1	Global III VN2.KH1.Aus	lineage3.7.29.1.4.1
+1138004	G	3.7.30	Global III Middle East III	lineage3.7.30
+4117025	T	3.7.30.1	Global III Middle East III	lineage3.7.30.1
+185962	T	3.7.30.2	Global III Middle East III	lineage3.7.30.2
+4478931	T	3.7.30.3	Global III Middle East III	lineage3.7.30.3
+3082193	A	3.7.30.4	Global III Israel III	lineage3.7.30.4
+228956	T	3.7.30.4.1	Global III OJC	lineage3.7.30.4.1
+459285	T	3.7.31	Global III	lineage3.7.31
+4620311	T	3.7.32	Global III	lineage3.7.32
+2162977	A	3.7.33	Global III	lineage3.7.33
+3158047	T	3.8	-	lineage3.8
+3578187	T	3.9	-	lineage3.9
+4386138	G	3.10	-	lineage3.10
+4807527	T	4	Lineage IV	lineage4
+5824	T	5	Lineage V	lineage5
+3861343	G	5.1.1	-	lineage5.1.1
+4840210	T	5.1.2	-	lineage5.1.2
+3768799	A	5.1.3	-	lineage5.1.3
+4617179	G	5.1.4	-	lineage5.1.4
+1539008	T	5.1.5	-	lineage5.1.5
+1268677	T	5.1.6	-	lineage5.1.6