Mercurial > repos > jjjjia > cpo_prediction

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpo_combine.sh	Sat Aug 25 20:56:37 2018 -0400
@@ -0,0 +1,9 @@
+head -1 ${1[1]} > combined.tsv
+
+IFS=',' read -ra ADDR <<< "$1" #hax to read in a csv
+
+head -1 ${ADDR[0]} > ./combined.tsv
+for i in "${ADDR[@]}"; do
+	echo $i
+	tail -1 $i >> ./combined.tsv
+done
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpo_combiner.sh	Sat Aug 25 20:56:37 2018 -0400
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+head -1 ${1[1]} > combined.tsv
+
+IFS=',' read -ra ADDR <<< "$1" #hax to read in a csv
+
+head -1 ${ADDR[0]} > ./combined.tsv
+for i in "${ADDR[@]}"; do
+        echo $i
+        tail -1 $i >> ./combined.tsv
+done
\ No newline at end of file
--- a/cpo_galaxy_prediction.py	Fri Aug 24 19:10:42 2018 -0400
+++ b/cpo_galaxy_prediction.py	Sat Aug 25 20:56:37 2018 -0400
@@ -457,7 +457,10 @@
         pf.start = int(plasmidFinder.iloc[i,2])
         pf.end = int(plasmidFinder.iloc[i,3])
         pf.gene = str(plasmidFinder.iloc[i,4])
-        pf.shortGene = pf.gene[:pf.gene.index("_")]
+        if (pf.gene.find("_") > -1):
+            pf.shortGene = pf.gene[:pf.gene.index("_")]
+        else:
+            pf.shortGene = pf.gene
         pf.coverage = str(plasmidFinder.iloc[i,5])
         pf.coverage_map = str(plasmidFinder.iloc[i,6])
         pf.gaps = str(plasmidFinder.iloc[i,7])
@@ -528,12 +531,14 @@
     rgiAMR = ParseRGIResult(rgi, plasmidContigs, likelyPlasmidContigs) # outputDir + "/predictions/" + ID + ".rgi.txt", plasmidContigs, likelyPlasmidContigs)#***********************
     ToJson(rgiAMR, "rgi.json") #*************

-    carbapenamases = []
+    carbapenamases = []
+    resfinderCarbas = [] #list of rfinder objects for lindaout list
     amrGenes = []
     for keys in rFinder:
         carbapenamases.append(rFinder[keys].shortGene + "(" + rFinder[keys].source + ")")
+        resfinderCarbas.append(rFinder[keys])
     for keys in rgiAMR:
-        if (rgiAMR[keys].Drug_Class.find("carbapenem") > -1):
+        if (rgiAMR[keys].Drug_Class.find("carbapenem") > -1 and rgiAMR[keys].AMR_Gene_Family.find("beta-lactamase") > -1):
             if (rgiAMR[keys].Best_Hit_ARO not in carbapenamases):
                 carbapenamases.append(rgiAMR[keys].Best_Hit_ARO+ "(" + rgiAMR[keys].source + ")")
         else:
@@ -598,7 +603,28 @@
     #TSV output
     lindaOut = []
     tsvOut = []
-    lindaOut.append("new\tID\tQUALITY\tExpected Species\tMLST Scheme\tSequence Type\tMLST_ALLELE_1\tMLST_ALLELE_2\tMLST_ALLELE_3\tMLST_ALLELE_4\tMLST_ALLELE_5\tMLST_ALLELE_6\tMLST_ALLELE_7\tSEROTYPE\tK_CAPSULE\tPLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH\tPLASMID_1_CARBAPENEMASE\tPLASMID_1_INC_GROUP\tPLASMID_2_RFLP\tPLASMID_2_FAMILY\tPLASMID_2_BEST_MATCH\tPLASMID_2_COVERAGE\tPLASMID_2_SNVS_TO_BEST_MATCH\tPLASMID_2_CARBAPENEMASE\tPLASMID_2_INC_GROUP")
+    lindaOut.append("ID\tQUALITY\tExpected Species\tMLST Scheme\tSequence Type\tMLST_ALLELE_1\tMLST_ALLELE_2\tMLST_ALLELE_3\tMLST_ALLELE_4\tMLST_ALLELE_5\tMLST_ALLELE_6\tMLST_ALLELE_7\tSEROTYPE\tK_CAPSULE\tPLASMID_2_RFLP\tPLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH\tPLASMID_1_CARBAPENEMASE\tPLASMID_1_INC_GROUP\tPLASMID_2_RFLP\tPLASMID_2_FAMILY\tPLASMID_2_BEST_MATCH\tPLASMID_2_COVERAGE\tPLASMID_2_SNVS_TO_BEST_MATCH\tPLASMID_2_CARBAPENEMASE\tPLASMID_2_INC_GROUP")
+    lindaTemp = ID + "\t" #id
+    lindaTemp += "\t" #quality
+    lindaTemp += expectedSpecies + "\t" #expected
+    lindaTemp += mlstHit.species + "\t" #mlstscheme
+    lindaTemp += str(mlstHit.seqType)  + "\t" #seq type
+    lindaTemp += "\t".join(mlstHit.scheme.split(";")) + "\t"#mlst alleles x 7
+    lindaTemp += "\t\t" #sero and kcap
+
+    #resfinderCarbas
+    for carbs in resfinderCarbas:
+        if (carbs.source == "plasmid"): #
+            lindaTemp += "\t\t\t\t\t" #plasmid 1 rflp plasmid 1 family information. PLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH
+            lindaTemp += carbs.shortGene + "\t" #found an carbapenase
+            contig = carbs.sequence[6:] #this is the contig number
+            for i in mSuite.keys():
+                if (str(mSuite[i].contig_num) == str(contig)): #found the right plasmid
+                    lindaTemp += mSuite[i].rep_type
+    lindaOut.append(lindaTemp)
+    out = open("summary.linda.tsv", 'w')
+    for item in lindaOut:
+        out.write("%s\n" % item)

     tsvOut.append("new\tID\tExpected Species\tMLST Species\tSequence Type\tMLST Scheme\tCarbapenem Resistance Genes\tOther AMR Genes\tTotal Plasmids\tPlasmids ID\tNum_Contigs\tPlasmid Length\tPlasmid RepType\tPlasmid Mobility\tNearest Reference\tDefinitely Plasmid Contigs\tLikely Plasmid Contigs")
     #start with ID
--- a/cpo_galaxy_predictions.xml	Fri Aug 24 19:10:42 2018 -0400
+++ b/cpo_galaxy_predictions.xml	Sat Aug 25 20:56:37 2018 -0400
@@ -26,10 +26,11 @@
         <param type="data" name="abricate" format="tabular" />
         <param type="data" name="rgi" format="tabular" />
         <param type="data" name="plasmidfinder" format="tabular" />
-        <param type="text" name="expected"/>
+        <param type="text" name="expected" optional ="false"/>
     </inputs>
     <outputs>
         <data name="tsvSummary" format="tabular" from_work_dir="summary.tsv"/>
+          <data name="tsvSummaryExistingFormat" format="tabular" from_work_dir="summary.linda.tsv"/>
 		  <data name="txtSummary" format="txt" from_work_dir="summary.txt"/>
     </outputs>
 	<tests>
--- a/cpo_galaxy_tree.py	Fri Aug 24 19:10:42 2018 -0400
+++ b/cpo_galaxy_tree.py	Sat Aug 25 20:56:37 2018 -0400
@@ -263,7 +263,8 @@
             n.add_face(addFace(mData.CarbapenemResistanceGenes), index, "aligned")
             index = index + 1
             for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds distance matrix
-                n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned")
+                if (n.name in distanceDict): #make sure the column is in the distance matrice
+                    n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned")

     t.render("./tree.pdf", w=5000,units="mm", tree_style=ts) #save it as a png. or an phyloxml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpo_prediction_combine.xml	Sat Aug 25 20:56:37 2018 -0400
@@ -0,0 +1,31 @@
+<tool id="cpo_prediction_combiner" name="cpo_prediction_combiner" version="0.1.0">
+  <description>This tool combines a collection result from cpo_prediction_parser into 1 file</description>
+  <requirements>
+		<requirement type="package" version="3.6">python</requirement>
+	</requirements>
+	<command detect_errors="exit_code">
+    <![CDATA[
+      bash '$__tool_directory__/cpo_combine.sh' '$indirs'
+    ]]>
+	</command>
+    <inputs>
+      <param name="indirs" type="data" multiple="true" format="tabular"/>
+    </inputs>
+    <outputs>
+        <data name="combinedSummary" format="tabular" from_work_dir="combined.tsv"/>
+    </outputs>
+	<help>
+		This tool combines multiple single prediction outputs together into one.
+	</help>
+	<citations>
+        <citation type="bibtex">
+@misc{cpo,
+  author = {j, j},
+  year = {2018},
+  title = {cpo_prediction},
+  publisher = {j},
+  journal = {j of j},
+  url = {https://bfjia.net,
+}</citation>
+    </citations>
+</tool>
\ No newline at end of file