# HG changeset patch
# User jjjjia
# Date 1535041275 14400
# Node ID cabceaa239e49312e6ba51e39d65437c47229e81
# Parent 698579246d0df3b9d4009182c4ee5fc20d8a3906
planemo upload
diff -r 698579246d0d -r cabceaa239e4 cpo_clustalw.xml
--- a/cpo_clustalw.xml Tue Aug 21 17:53:08 2018 -0400
+++ b/cpo_clustalw.xml Thu Aug 23 12:21:15 2018 -0400
@@ -5,38 +5,17 @@
-
-
+
-
-**Syntax**
-
-This tool reconstructs individual plasmid sequences from draft genome assemblies using the plasmid reference databases.
-
-For more information please visit https://github.com/phac-nml/mob-suite/.
-
------
-
-**Input:**
-
-A FASTA file with a single or multiple contigs (e.g. a draft genome assembly):
-
-
-**Output:**
-
-Tab-delimited report listing information for each input contig on its cluster number, possible replicon, relaxase, and repetitive elements types, etc. Refer to https://github.com/phac-nml/mob-suite#mob-recon-contig-report-format for the description of each column.
-
-Note: Plasmid sequences will not be output if none are found. Some plasmid could be intergrated into a chromosome.
-
-
+ clustalw2 -tree -infile=$input -outputtree=nj
@@ -49,4 +28,4 @@
url = {https://github.com/phac-nml/mob-suite}
}
-
+
\ No newline at end of file
diff -r 698579246d0d -r cabceaa239e4 cpo_galaxy_prediction.py
--- a/cpo_galaxy_prediction.py Tue Aug 21 17:53:08 2018 -0400
+++ b/cpo_galaxy_prediction.py Thu Aug 23 12:21:15 2018 -0400
@@ -26,7 +26,7 @@
import numpy
-debug = True #debug skips the shell scripts and also dump out a ton of debugging messages
+debug = False #debug skips the shell scripts and also dump out a ton of debugging messages
if not debug:
#parses some parameters
@@ -596,10 +596,13 @@
#TSV output
+ lindaOut = []
tsvOut = []
- tsvOut.append("ID\tExpected Species\tMLST Species\tSequence Type\tMLST Scheme\tCarbapenem Resistance Genes\tOther AMR Genes\tTotal Plasmids\tPlasmids ID\tNum_Contigs\tPlasmid Length\tPlasmid RepType\tPlasmid Mobility\tNearest Reference\tDefinitely Plasmid Contigs\tLikely Plasmid Contigs")
+ lindaOut.append("new\tID\tQUALITY\tExpected Species\tMLST Scheme\tSequence Type\tMLST_ALLELE_1\tMLST_ALLELE_2\tMLST_ALLELE_3\tMLST_ALLELE_4\tMLST_ALLELE_5\tMLST_ALLELE_6\tMLST_ALLELE_7\tSEROTYPE\tK_CAPSULE\tPLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH\tPLASMID_1_CARBAPENEMASE\tPLASMID_1_INC_GROUP\tPLASMID_2_RFLP\tPLASMID_2_FAMILY\tPLASMID_2_BEST_MATCH\tPLASMID_2_COVERAGE\tPLASMID_2_SNVS_TO_BEST_MATCH\tPLASMID_2_CARBAPENEMASE\tPLASMID_2_INC_GROUP")
+
+ tsvOut.append("new\tID\tExpected Species\tMLST Species\tSequence Type\tMLST Scheme\tCarbapenem Resistance Genes\tOther AMR Genes\tTotal Plasmids\tPlasmids ID\tNum_Contigs\tPlasmid Length\tPlasmid RepType\tPlasmid Mobility\tNearest Reference\tDefinitely Plasmid Contigs\tLikely Plasmid Contigs")
#start with ID
- temp = ""
+ temp = "\t"
temp += (ID + "\t")
temp += expectedSpecies + "\t"
diff -r 698579246d0d -r cabceaa239e4 cpo_galaxy_predictions.xml
--- a/cpo_galaxy_predictions.xml Tue Aug 21 17:53:08 2018 -0400
+++ b/cpo_galaxy_predictions.xml Thu Aug 23 12:21:15 2018 -0400
@@ -19,14 +19,14 @@
]]>
-
+
-
+
@@ -43,7 +43,7 @@
-
+
diff -r 698579246d0d -r cabceaa239e4 cpo_galaxy_tree.py
--- a/cpo_galaxy_tree.py Tue Aug 21 17:53:08 2018 -0400
+++ b/cpo_galaxy_tree.py Thu Aug 23 12:21:15 2018 -0400
@@ -24,8 +24,9 @@
import gzip
import collections
import json
-import ete3
import numpy
+import ete3 as e
+
#parses some parameters
parser = optparse.OptionParser("Usage: %prog [options] arg1 arg2 ...")
@@ -105,38 +106,45 @@
with open(outputpath, 'wb') as out:
out.write(gzContent)
return True
+def addFace(name):
+ #if its the reference branch, populate the faces with column headers
+ face = e.faces.TextFace(name,fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_right = 5
+ face.margin_left = 5
+ return face
#endregion
#region functions to parse result files
def ParseWorkflowResults(pathToResult):
_worflowResult = {}
- r = pandas.read_csv(pathToResult, delimiter='\t', header=None) #read the kraken2report.tsv
+ r = pandas.read_csv(pathToResult, delimiter='\t', header=0)
r = r.replace(numpy.nan, '', regex=True)
for i in range(len(r.index)):
_results = workflowResult()
- if(str(r.iloc[i,0]).lower() == "new"):
+ if(str(r.loc[r.index[i], 'new']).lower() == "new"):
_results.new = True
else:
_results.new = False
- _results.ID = str(r.iloc[i,1])
- _results.ExpectedSpecies = str(r.iloc[i,2])
- _results.MLSTSpecies = str(r.iloc[i,3])
- _results.SequenceType = str(r.iloc[i,4])
- _results.MLSTScheme = (str(r.iloc[i,5]))
- _results.CarbapenemResistanceGenes = (str(r.iloc[i,6]))
- _results.OtherAMRGenes = (str(r.iloc[i,7]))
- _results.TotalPlasmids = int(r.iloc[i,8])
+ _results.ID = str(r.loc[r.index[i], 'ID'])
+ _results.ExpectedSpecies = str(r.loc[r.index[i], 'Expected Species'])
+ _results.MLSTSpecies = str(r.loc[r.index[i], 'MLST Species'])
+ _results.SequenceType = str(r.loc[r.index[i], 'Sequence Type'])
+ _results.MLSTScheme = (str(r.loc[r.index[i], 'MLST Scheme']))
+ _results.CarbapenemResistanceGenes = (str(r.loc[r.index[i], 'Carbapenem Resistance Genes']))
+ _results.OtherAMRGenes = (str(r.loc[r.index[i], 'Other AMR Genes']))
+ _results.TotalPlasmids = int(r.loc[r.index[i], 'Total Plasmids'])
for j in range(0,_results.TotalPlasmids):
_plasmid = plasmidObj()
- _plasmid.PlasmidsID =(((str(r.iloc[i,9])).split(";"))[j])
- _plasmid.Num_Contigs = (((str(r.iloc[i,10])).split(";"))[j])
- _plasmid.PlasmidLength = (((str(r.iloc[i,11])).split(";"))[j])
- _plasmid.PlasmidRepType = (((str(r.iloc[i,12])).split(";"))[j])
- _plasmid.PlasmidMobility = ((str(r.iloc[i,13])).split(";"))[j]
- _plasmid.NearestReference = ((str(r.iloc[i,14])).split(";"))[j]
+ _plasmid.PlasmidsID =(((str(r.loc[r.index[i], 'Plasmids ID'])).split(";"))[j])
+ _plasmid.Num_Contigs = (((str(r.loc[r.index[i], 'Num_Contigs'])).split(";"))[j])
+ _plasmid.PlasmidLength = (((str(r.loc[r.index[i], 'Plasmid Length'])).split(";"))[j])
+ _plasmid.PlasmidRepType = (((str(r.loc[r.index[i], 'Plasmid RepType'])).split(";"))[j])
+ _plasmid.PlasmidMobility = ((str(r.loc[r.index[i], 'Plasmid Mobility'])).split(";"))[j]
+ _plasmid.NearestReference = ((str(r.loc[r.index[i], 'Nearest Reference'])).split(";"))[j]
_results.plasmids.append(_plasmid)
- _results.DefinitelyPlasmidContigs = (str(r.iloc[i,15]))
- _results.LikelyPlasmidContigs = (str(r.iloc[i,16]))
+ _results.DefinitelyPlasmidContigs = (str(r.loc[r.index[i], 'Definitely Plasmid Contigs']))
+ _results.LikelyPlasmidContigs = (str(r.loc[r.index[i], 'Likely Plasmid Contigs']))
_results.row = "\t".join(str(x) for x in r.ix[i].tolist())
_worflowResult[_results.ID] = _results
return _worflowResult
@@ -148,20 +156,90 @@
distance = read(distancePath)
treeFile = "".join(read(treePath))
- distanceDict = {}
+ distanceDict = {} #store the distance matrix as rowname:list
for i in range(len(distance)):
temp = distance[i].split("\t")
distanceDict[temp[0]] = temp[1:]
#region step5: tree construction
- t = ete3.Tree(treeFile)
+
+ '''
+ #region create detailed tree
+
+ plasmidCount = 0
+ for n in t.traverse():
+ if (n.is_leaf() and not n.name == "Reference"):
+ mData = metadata[n.name.replace(".fa","")]
+ face = faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_left = 10
+ face.margin_right = 10
+ n.add_face(face, 0, "aligned")
+ face = faces.TextFace(mData.SequenceType,fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_right = 10
+ n.add_face(face, 1, "aligned")
+ face = faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_right = 10
+ n.add_face(face, 2, "aligned")
+ index = 3
+ if (mData.TotalPlasmids > plasmidCount):
+ plasmidCount = mData.TotalPlasmids
+ for i in range(0, mData.TotalPlasmids):
+ face = faces.TextFace(mData.plasmids[i].PlasmidRepType,fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_right = 10
+ n.add_face(face, index, "aligned")
+ index+=1
+ face = faces.TextFace(mData.plasmids[i].PlasmidMobility,fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_right = 10
+ n.add_face(face, index, "aligned")
+ index+=1
+
+ face = faces.TextFace("Species",fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_right = 10
+ face.margin_left = 10
+ (t&"Reference").add_face(face, 0, "aligned")
+ face = faces.TextFace("Sequence Type",fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_right = 10
+ (t&"Reference").add_face(face, 1, "aligned")
+ face = faces.TextFace("Carbapenamases",fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_right = 10
+ (t&"Reference").add_face(face, 2, "aligned")
+ index = 3
+ for i in range(0, plasmidCount):
+ face = faces.TextFace("plasmid " + str(i) + " replicons",fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_right = 10
+ (t&"Reference").add_face(face, index, "aligned")
+ index+=1
+ face = faces.TextFace("plasmid " + str(i) + " mobility",fsize=10,tight_text=True)
+ face.border.margin = 5
+ face.margin_right = 10
+ (t&"Reference").add_face(face, index, "aligned")
+ index+=1
+
+ t.render("./pipelineTest/tree.png", w=5000,units="mm", tree_style=ts)
+
+ #endregion
+ '''
+ #region create box tree
+ #region step5: tree construction
+ treeFile = "".join(read(treePath))
+ t = e.Tree(treeFile)
t.set_outgroup(t&"Reference")
- ts = ete3.TreeStyle()
- ts.show_leaf_name = True
+ #set the tree style
+ ts = e.TreeStyle()
+ ts.show_leaf_name = False
ts.show_branch_length = True
ts.scale = 2000 #pixel per branch length unit
ts.branch_vertical_margin = 15 #pixel between branches
- style2 = ete3.NodeStyle()
+ style2 = e.NodeStyle()
style2["fgcolor"] = "#000000"
style2["shape"] = "circle"
style2["vt_line_color"] = "#0000aa"
@@ -172,93 +250,8 @@
style2["hz_line_type"] = 0
for n in t.traverse():
n.set_style(style2)
- '''
- #region create detailed tree
-
- plasmidCount = 0
- for n in t.traverse():
- if (n.is_leaf() and not n.name == "Reference"):
- mData = metadata[n.name.replace(".fa","")]
- face = ete3.faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_left = 10
- face.margin_right = 10
- n.add_face(face, 0, "aligned")
- face = ete3.faces.TextFace(mData.SequenceType,fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 10
- n.add_face(face, 1, "aligned")
- face = ete3.faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 10
- n.add_face(face, 2, "aligned")
- index = 3
- if (mData.TotalPlasmids > plasmidCount):
- plasmidCount = mData.TotalPlasmids
- for i in range(0, mData.TotalPlasmids):
- face = ete3.faces.TextFace(mData.plasmids[i].PlasmidRepType,fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 10
- n.add_face(face, index, "aligned")
- index+=1
- face = ete3.faces.TextFace(mData.plasmids[i].PlasmidMobility,fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 10
- n.add_face(face, index, "aligned")
- index+=1
- face = ete3.faces.TextFace("Species",fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 10
- face.margin_left = 10
- (t&"Reference").add_face(face, 0, "aligned")
- face = ete3.faces.TextFace("Sequence Type",fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 10
- (t&"Reference").add_face(face, 1, "aligned")
- face = ete3.faces.TextFace("Carbapenamases",fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 10
- (t&"Reference").add_face(face, 2, "aligned")
- index = 3
- for i in range(0, plasmidCount):
- face = ete3.faces.TextFace("plasmid " + str(i) + " replicons",fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 10
- (t&"Reference").add_face(face, index, "aligned")
- index+=1
- face = ete3.faces.TextFace("plasmid " + str(i) + " mobility",fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 10
- (t&"Reference").add_face(face, index, "aligned")
- index+=1
-
- t.render("./pipelineTest/tree.png", w=5000,units="mm", tree_style=ts)
-
- #endregion
- '''
- #region create box tree
- #region step5: tree construction
- treeFile = "".join(read("./pipelineTest/tree.txt"))
- t = ete3.Tree(treeFile)
- t.set_outgroup(t&"Reference")
-
- ts = ete3.TreeStyle()
- ts.show_leaf_name = True
- ts.show_branch_length = True
- ts.scale = 2000 #pixel per branch length unit
- ts.branch_vertical_margin = 15 #pixel between branches
- style2 = ete3.NodeStyle()
- style2["fgcolor"] = "#000000"
- style2["shape"] = "circle"
- style2["vt_line_color"] = "#0000aa"
- style2["hz_line_color"] = "#0000aa"
- style2["vt_line_width"] = 2
- style2["hz_line_width"] = 2
- style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted
- style2["hz_line_type"] = 0
- for n in t.traverse():
- n.set_style(style2)
+ #find the plasmid origins
plasmidIncs = {}
for key in metadata:
for plasmid in metadata[key].plasmids:
@@ -270,81 +263,61 @@
if metadata[key].ID not in plasmidIncs[inc]:
plasmidIncs[inc].append(metadata[key].ID)
#plasmidIncs = sorted(plasmidIncs)
- for n in t.traverse():
+ for n in t.traverse(): #loop through the nodes of a tree
if (n.is_leaf() and n.name == "Reference"):
- face = ete3.faces.TextFace("New?",fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 5
- face.margin_left = 5
- (t&"Reference").add_face(face, 0, "aligned")
+ #if its the reference branch, populate the faces with column headers
+ index = 0
+ (t&"Reference").add_face(addFace("SampleID"), index, "aligned")
+ index = index + 1
+ (t&"Reference").add_face(addFace("New?"), index, "aligned")
+ index = index + 1
for i in range(len(plasmidIncs)): #this loop adds the columns (aka the incs) to the reference node
- face = ete3.faces.TextFace(list(plasmidIncs.keys())[i],fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 5
- face.margin_left = 5
- (t&"Reference").add_face(face, i + 1, "aligned")
- face = ete3.faces.TextFace("MLSTScheme",fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 5
- face.margin_left = 5
- (t&"Reference").add_face(face, len(plasmidIncs) + 0 + 1, "aligned")
- face = ete3.faces.TextFace("Sequence Type",fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 5
- face.margin_left = 5
- (t&"Reference").add_face(face, len(plasmidIncs) + 1 + 1, "aligned")
- face = ete3.faces.TextFace("Carbapenamases",fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 5
- face.margin_left = 5
- (t&"Reference").add_face(face, len(plasmidIncs) + 2 + 1, "aligned")
- for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds the columns (aka the incs) to the reference node
- face = ete3.faces.TextFace(distanceDict[list(distanceDict.keys())[0]][i],fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 5
- face.margin_left = 5
- (t&"Reference").add_face(face, len(plasmidIncs) + 2 + i + 1 + 1, "aligned")
- elif (n.is_leaf() and not n.name == "Reference"):
- if (metadata[n.name.replace(".fa","")].new == True):
- face = ete3.faces.RectFace(30,30,"green","green") # TextFace("Y",fsize=10,tight_text=True)
+ (t&"Reference").add_face(addFace(list(plasmidIncs.keys())[i]), i + index, "aligned")
+ index = index + len(plasmidIncs)
+ (t&"Reference").add_face(addFace("MLSTScheme"), index, "aligned")
+ index = index + 1
+ (t&"Reference").add_face(addFace("Sequence Type"), index, "aligned")
+ index = index + 1
+ (t&"Reference").add_face(addFace("Carbapenamases"), index, "aligned")
+ index = index + 1
+ for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds the distance matrix
+ (t&"Reference").add_face(addFace(distanceDict[list(distanceDict.keys())[0]][i]), index + i, "aligned")
+ index = index + len(distanceDict[list(distanceDict.keys())[0]])
+ elif (n.is_leaf() and not n.name == "Reference"):
+ #not reference branches, populate with metadata
+ index = 0
+ mData = metadata[n.name.replace(".fa","")]
+ n.add_face(addFace(mData.ID), index, "aligned")
+ index = index + 1
+ if (metadata[n.name.replace(".fa","")].new == True): #new column
+ face = e.RectFace(30,30,"green","green") # TextFace("Y",fsize=10,tight_text=True)
face.border.margin = 5
face.margin_right = 5
face.margin_left = 5
face.vt_align = 1
face.ht_align = 1
- n.add_face(face, 0, "aligned")
+ n.add_face(face, index, "aligned")
+ index = index + 1
for incs in plasmidIncs: #this loop adds presence/absence to the sample nodes
if (n.name.replace(".fa","") in plasmidIncs[incs]):
- face = ete3.faces.RectFace(30,30,"black","black") # TextFace("Y",fsize=10,tight_text=True)
+ face = e.RectFace(30,30,"black","black") # TextFace("Y",fsize=10,tight_text=True)
face.border.margin = 5
face.margin_right = 5
face.margin_left = 5
face.vt_align = 1
face.ht_align = 1
- n.add_face(face, list(plasmidIncs.keys()).index(incs) + 1, "aligned")
- mData = metadata[n.name.replace(".fa","")]
- face = ete3.faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 5
- face.margin_left = 5
- n.add_face(face, len(plasmidIncs) + 0 + 1, "aligned")
- face = ete3.faces.TextFace(mData.SequenceType,fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 5
- face.margin_left = 5
- n.add_face(face, len(plasmidIncs) + 1 + 1, "aligned")
- face = ete3.faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True)
- face.margin_right = 5
- face.margin_left = 5
- n.add_face(face, len(plasmidIncs) + 2 + 1, "aligned")
+ n.add_face(face, list(plasmidIncs.keys()).index(incs) + index, "aligned")
+ index = index + len(plasmidIncs)
+ n.add_face(addFace(mData.MLSTSpecies), index, "aligned")
+ index = index + 1
+ n.add_face(addFace(mData.SequenceType), index, "aligned")
+ index = index + 1
+ n.add_face(addFace(mData.CarbapenemResistanceGenes), index, "aligned")
+ index = index + 1
for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds distance matrix
- face = ete3.faces.TextFace(list(distanceDict[n.name])[i],fsize=10,tight_text=True)
- face.border.margin = 5
- face.margin_right = 5
- face.margin_left = 5
- n.add_face(face, len(plasmidIncs) + 2 + i + 1 + 1, "aligned")
-
- t.render("./tree.png", w=5000,units="mm", tree_style=ts)
+ n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned")
+
+ t.render("./tree.png", w=5000,units="mm", tree_style=ts) #save it as a png. or an phyloxml
#endregion
#endregion
diff -r 698579246d0d -r cabceaa239e4 cpo_galaxy_tree.xml
--- a/cpo_galaxy_tree.xml Tue Aug 21 17:53:08 2018 -0400
+++ b/cpo_galaxy_tree.xml Thu Aug 23 12:21:15 2018 -0400
@@ -4,6 +4,7 @@
pandas
python
ete3
+ pyqt
-
\ No newline at end of file
+
diff -r 698579246d0d -r cabceaa239e4 cpo_plasmidfinder.xml
--- a/cpo_plasmidfinder.xml Tue Aug 21 17:53:08 2018 -0400
+++ b/cpo_plasmidfinder.xml Thu Aug 23 12:21:15 2018 -0400
@@ -1,5 +1,5 @@
-
- this tool parses stuff
+
+ Modified plasmidfinder v0.8 with custom database
abricate
diff -r 698579246d0d -r cabceaa239e4 cpo_prediction.tar.gz
Binary file cpo_prediction.tar.gz has changed