# HG changeset patch # User jjjjia # Date 1535041275 14400 # Node ID cabceaa239e49312e6ba51e39d65437c47229e81 # Parent 698579246d0df3b9d4009182c4ee5fc20d8a3906 planemo upload diff -r 698579246d0d -r cabceaa239e4 cpo_clustalw.xml --- a/cpo_clustalw.xml Tue Aug 21 17:53:08 2018 -0400 +++ b/cpo_clustalw.xml Thu Aug 23 12:21:15 2018 -0400 @@ -5,38 +5,17 @@ - - + - -**Syntax** - -This tool reconstructs individual plasmid sequences from draft genome assemblies using the plasmid reference databases. - -For more information please visit https://github.com/phac-nml/mob-suite/. - ------ - -**Input:** - -A FASTA file with a single or multiple contigs (e.g. a draft genome assembly): - - -**Output:** - -Tab-delimited report listing information for each input contig on its cluster number, possible replicon, relaxase, and repetitive elements types, etc. Refer to https://github.com/phac-nml/mob-suite#mob-recon-contig-report-format for the description of each column. - -Note: Plasmid sequences will not be output if none are found. Some plasmid could be intergrated into a chromosome. - - + clustalw2 -tree -infile=$input -outputtree=nj @@ -49,4 +28,4 @@ url = {https://github.com/phac-nml/mob-suite} } - + \ No newline at end of file diff -r 698579246d0d -r cabceaa239e4 cpo_galaxy_prediction.py --- a/cpo_galaxy_prediction.py Tue Aug 21 17:53:08 2018 -0400 +++ b/cpo_galaxy_prediction.py Thu Aug 23 12:21:15 2018 -0400 @@ -26,7 +26,7 @@ import numpy -debug = True #debug skips the shell scripts and also dump out a ton of debugging messages +debug = False #debug skips the shell scripts and also dump out a ton of debugging messages if not debug: #parses some parameters @@ -596,10 +596,13 @@ #TSV output + lindaOut = [] tsvOut = [] - tsvOut.append("ID\tExpected Species\tMLST Species\tSequence Type\tMLST Scheme\tCarbapenem Resistance Genes\tOther AMR Genes\tTotal Plasmids\tPlasmids ID\tNum_Contigs\tPlasmid Length\tPlasmid RepType\tPlasmid Mobility\tNearest Reference\tDefinitely Plasmid Contigs\tLikely Plasmid Contigs") + lindaOut.append("new\tID\tQUALITY\tExpected Species\tMLST Scheme\tSequence Type\tMLST_ALLELE_1\tMLST_ALLELE_2\tMLST_ALLELE_3\tMLST_ALLELE_4\tMLST_ALLELE_5\tMLST_ALLELE_6\tMLST_ALLELE_7\tSEROTYPE\tK_CAPSULE\tPLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH\tPLASMID_1_CARBAPENEMASE\tPLASMID_1_INC_GROUP\tPLASMID_2_RFLP\tPLASMID_2_FAMILY\tPLASMID_2_BEST_MATCH\tPLASMID_2_COVERAGE\tPLASMID_2_SNVS_TO_BEST_MATCH\tPLASMID_2_CARBAPENEMASE\tPLASMID_2_INC_GROUP") + + tsvOut.append("new\tID\tExpected Species\tMLST Species\tSequence Type\tMLST Scheme\tCarbapenem Resistance Genes\tOther AMR Genes\tTotal Plasmids\tPlasmids ID\tNum_Contigs\tPlasmid Length\tPlasmid RepType\tPlasmid Mobility\tNearest Reference\tDefinitely Plasmid Contigs\tLikely Plasmid Contigs") #start with ID - temp = "" + temp = "\t" temp += (ID + "\t") temp += expectedSpecies + "\t" diff -r 698579246d0d -r cabceaa239e4 cpo_galaxy_predictions.xml --- a/cpo_galaxy_predictions.xml Tue Aug 21 17:53:08 2018 -0400 +++ b/cpo_galaxy_predictions.xml Thu Aug 23 12:21:15 2018 -0400 @@ -19,14 +19,14 @@ ]]> - + - + @@ -43,7 +43,7 @@ - + diff -r 698579246d0d -r cabceaa239e4 cpo_galaxy_tree.py --- a/cpo_galaxy_tree.py Tue Aug 21 17:53:08 2018 -0400 +++ b/cpo_galaxy_tree.py Thu Aug 23 12:21:15 2018 -0400 @@ -24,8 +24,9 @@ import gzip import collections import json -import ete3 import numpy +import ete3 as e + #parses some parameters parser = optparse.OptionParser("Usage: %prog [options] arg1 arg2 ...") @@ -105,38 +106,45 @@ with open(outputpath, 'wb') as out: out.write(gzContent) return True +def addFace(name): + #if its the reference branch, populate the faces with column headers + face = e.faces.TextFace(name,fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_right = 5 + face.margin_left = 5 + return face #endregion #region functions to parse result files def ParseWorkflowResults(pathToResult): _worflowResult = {} - r = pandas.read_csv(pathToResult, delimiter='\t', header=None) #read the kraken2report.tsv + r = pandas.read_csv(pathToResult, delimiter='\t', header=0) r = r.replace(numpy.nan, '', regex=True) for i in range(len(r.index)): _results = workflowResult() - if(str(r.iloc[i,0]).lower() == "new"): + if(str(r.loc[r.index[i], 'new']).lower() == "new"): _results.new = True else: _results.new = False - _results.ID = str(r.iloc[i,1]) - _results.ExpectedSpecies = str(r.iloc[i,2]) - _results.MLSTSpecies = str(r.iloc[i,3]) - _results.SequenceType = str(r.iloc[i,4]) - _results.MLSTScheme = (str(r.iloc[i,5])) - _results.CarbapenemResistanceGenes = (str(r.iloc[i,6])) - _results.OtherAMRGenes = (str(r.iloc[i,7])) - _results.TotalPlasmids = int(r.iloc[i,8]) + _results.ID = str(r.loc[r.index[i], 'ID']) + _results.ExpectedSpecies = str(r.loc[r.index[i], 'Expected Species']) + _results.MLSTSpecies = str(r.loc[r.index[i], 'MLST Species']) + _results.SequenceType = str(r.loc[r.index[i], 'Sequence Type']) + _results.MLSTScheme = (str(r.loc[r.index[i], 'MLST Scheme'])) + _results.CarbapenemResistanceGenes = (str(r.loc[r.index[i], 'Carbapenem Resistance Genes'])) + _results.OtherAMRGenes = (str(r.loc[r.index[i], 'Other AMR Genes'])) + _results.TotalPlasmids = int(r.loc[r.index[i], 'Total Plasmids']) for j in range(0,_results.TotalPlasmids): _plasmid = plasmidObj() - _plasmid.PlasmidsID =(((str(r.iloc[i,9])).split(";"))[j]) - _plasmid.Num_Contigs = (((str(r.iloc[i,10])).split(";"))[j]) - _plasmid.PlasmidLength = (((str(r.iloc[i,11])).split(";"))[j]) - _plasmid.PlasmidRepType = (((str(r.iloc[i,12])).split(";"))[j]) - _plasmid.PlasmidMobility = ((str(r.iloc[i,13])).split(";"))[j] - _plasmid.NearestReference = ((str(r.iloc[i,14])).split(";"))[j] + _plasmid.PlasmidsID =(((str(r.loc[r.index[i], 'Plasmids ID'])).split(";"))[j]) + _plasmid.Num_Contigs = (((str(r.loc[r.index[i], 'Num_Contigs'])).split(";"))[j]) + _plasmid.PlasmidLength = (((str(r.loc[r.index[i], 'Plasmid Length'])).split(";"))[j]) + _plasmid.PlasmidRepType = (((str(r.loc[r.index[i], 'Plasmid RepType'])).split(";"))[j]) + _plasmid.PlasmidMobility = ((str(r.loc[r.index[i], 'Plasmid Mobility'])).split(";"))[j] + _plasmid.NearestReference = ((str(r.loc[r.index[i], 'Nearest Reference'])).split(";"))[j] _results.plasmids.append(_plasmid) - _results.DefinitelyPlasmidContigs = (str(r.iloc[i,15])) - _results.LikelyPlasmidContigs = (str(r.iloc[i,16])) + _results.DefinitelyPlasmidContigs = (str(r.loc[r.index[i], 'Definitely Plasmid Contigs'])) + _results.LikelyPlasmidContigs = (str(r.loc[r.index[i], 'Likely Plasmid Contigs'])) _results.row = "\t".join(str(x) for x in r.ix[i].tolist()) _worflowResult[_results.ID] = _results return _worflowResult @@ -148,20 +156,90 @@ distance = read(distancePath) treeFile = "".join(read(treePath)) - distanceDict = {} + distanceDict = {} #store the distance matrix as rowname:list for i in range(len(distance)): temp = distance[i].split("\t") distanceDict[temp[0]] = temp[1:] #region step5: tree construction - t = ete3.Tree(treeFile) + + ''' + #region create detailed tree + + plasmidCount = 0 + for n in t.traverse(): + if (n.is_leaf() and not n.name == "Reference"): + mData = metadata[n.name.replace(".fa","")] + face = faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_left = 10 + face.margin_right = 10 + n.add_face(face, 0, "aligned") + face = faces.TextFace(mData.SequenceType,fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_right = 10 + n.add_face(face, 1, "aligned") + face = faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_right = 10 + n.add_face(face, 2, "aligned") + index = 3 + if (mData.TotalPlasmids > plasmidCount): + plasmidCount = mData.TotalPlasmids + for i in range(0, mData.TotalPlasmids): + face = faces.TextFace(mData.plasmids[i].PlasmidRepType,fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_right = 10 + n.add_face(face, index, "aligned") + index+=1 + face = faces.TextFace(mData.plasmids[i].PlasmidMobility,fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_right = 10 + n.add_face(face, index, "aligned") + index+=1 + + face = faces.TextFace("Species",fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_right = 10 + face.margin_left = 10 + (t&"Reference").add_face(face, 0, "aligned") + face = faces.TextFace("Sequence Type",fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_right = 10 + (t&"Reference").add_face(face, 1, "aligned") + face = faces.TextFace("Carbapenamases",fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_right = 10 + (t&"Reference").add_face(face, 2, "aligned") + index = 3 + for i in range(0, plasmidCount): + face = faces.TextFace("plasmid " + str(i) + " replicons",fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_right = 10 + (t&"Reference").add_face(face, index, "aligned") + index+=1 + face = faces.TextFace("plasmid " + str(i) + " mobility",fsize=10,tight_text=True) + face.border.margin = 5 + face.margin_right = 10 + (t&"Reference").add_face(face, index, "aligned") + index+=1 + + t.render("./pipelineTest/tree.png", w=5000,units="mm", tree_style=ts) + + #endregion + ''' + #region create box tree + #region step5: tree construction + treeFile = "".join(read(treePath)) + t = e.Tree(treeFile) t.set_outgroup(t&"Reference") - ts = ete3.TreeStyle() - ts.show_leaf_name = True + #set the tree style + ts = e.TreeStyle() + ts.show_leaf_name = False ts.show_branch_length = True ts.scale = 2000 #pixel per branch length unit ts.branch_vertical_margin = 15 #pixel between branches - style2 = ete3.NodeStyle() + style2 = e.NodeStyle() style2["fgcolor"] = "#000000" style2["shape"] = "circle" style2["vt_line_color"] = "#0000aa" @@ -172,93 +250,8 @@ style2["hz_line_type"] = 0 for n in t.traverse(): n.set_style(style2) - ''' - #region create detailed tree - - plasmidCount = 0 - for n in t.traverse(): - if (n.is_leaf() and not n.name == "Reference"): - mData = metadata[n.name.replace(".fa","")] - face = ete3.faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_left = 10 - face.margin_right = 10 - n.add_face(face, 0, "aligned") - face = ete3.faces.TextFace(mData.SequenceType,fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 10 - n.add_face(face, 1, "aligned") - face = ete3.faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 10 - n.add_face(face, 2, "aligned") - index = 3 - if (mData.TotalPlasmids > plasmidCount): - plasmidCount = mData.TotalPlasmids - for i in range(0, mData.TotalPlasmids): - face = ete3.faces.TextFace(mData.plasmids[i].PlasmidRepType,fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 10 - n.add_face(face, index, "aligned") - index+=1 - face = ete3.faces.TextFace(mData.plasmids[i].PlasmidMobility,fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 10 - n.add_face(face, index, "aligned") - index+=1 - face = ete3.faces.TextFace("Species",fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 10 - face.margin_left = 10 - (t&"Reference").add_face(face, 0, "aligned") - face = ete3.faces.TextFace("Sequence Type",fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 10 - (t&"Reference").add_face(face, 1, "aligned") - face = ete3.faces.TextFace("Carbapenamases",fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 10 - (t&"Reference").add_face(face, 2, "aligned") - index = 3 - for i in range(0, plasmidCount): - face = ete3.faces.TextFace("plasmid " + str(i) + " replicons",fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 10 - (t&"Reference").add_face(face, index, "aligned") - index+=1 - face = ete3.faces.TextFace("plasmid " + str(i) + " mobility",fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 10 - (t&"Reference").add_face(face, index, "aligned") - index+=1 - - t.render("./pipelineTest/tree.png", w=5000,units="mm", tree_style=ts) - - #endregion - ''' - #region create box tree - #region step5: tree construction - treeFile = "".join(read("./pipelineTest/tree.txt")) - t = ete3.Tree(treeFile) - t.set_outgroup(t&"Reference") - - ts = ete3.TreeStyle() - ts.show_leaf_name = True - ts.show_branch_length = True - ts.scale = 2000 #pixel per branch length unit - ts.branch_vertical_margin = 15 #pixel between branches - style2 = ete3.NodeStyle() - style2["fgcolor"] = "#000000" - style2["shape"] = "circle" - style2["vt_line_color"] = "#0000aa" - style2["hz_line_color"] = "#0000aa" - style2["vt_line_width"] = 2 - style2["hz_line_width"] = 2 - style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted - style2["hz_line_type"] = 0 - for n in t.traverse(): - n.set_style(style2) + #find the plasmid origins plasmidIncs = {} for key in metadata: for plasmid in metadata[key].plasmids: @@ -270,81 +263,61 @@ if metadata[key].ID not in plasmidIncs[inc]: plasmidIncs[inc].append(metadata[key].ID) #plasmidIncs = sorted(plasmidIncs) - for n in t.traverse(): + for n in t.traverse(): #loop through the nodes of a tree if (n.is_leaf() and n.name == "Reference"): - face = ete3.faces.TextFace("New?",fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 5 - face.margin_left = 5 - (t&"Reference").add_face(face, 0, "aligned") + #if its the reference branch, populate the faces with column headers + index = 0 + (t&"Reference").add_face(addFace("SampleID"), index, "aligned") + index = index + 1 + (t&"Reference").add_face(addFace("New?"), index, "aligned") + index = index + 1 for i in range(len(plasmidIncs)): #this loop adds the columns (aka the incs) to the reference node - face = ete3.faces.TextFace(list(plasmidIncs.keys())[i],fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 5 - face.margin_left = 5 - (t&"Reference").add_face(face, i + 1, "aligned") - face = ete3.faces.TextFace("MLSTScheme",fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 5 - face.margin_left = 5 - (t&"Reference").add_face(face, len(plasmidIncs) + 0 + 1, "aligned") - face = ete3.faces.TextFace("Sequence Type",fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 5 - face.margin_left = 5 - (t&"Reference").add_face(face, len(plasmidIncs) + 1 + 1, "aligned") - face = ete3.faces.TextFace("Carbapenamases",fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 5 - face.margin_left = 5 - (t&"Reference").add_face(face, len(plasmidIncs) + 2 + 1, "aligned") - for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds the columns (aka the incs) to the reference node - face = ete3.faces.TextFace(distanceDict[list(distanceDict.keys())[0]][i],fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 5 - face.margin_left = 5 - (t&"Reference").add_face(face, len(plasmidIncs) + 2 + i + 1 + 1, "aligned") - elif (n.is_leaf() and not n.name == "Reference"): - if (metadata[n.name.replace(".fa","")].new == True): - face = ete3.faces.RectFace(30,30,"green","green") # TextFace("Y",fsize=10,tight_text=True) + (t&"Reference").add_face(addFace(list(plasmidIncs.keys())[i]), i + index, "aligned") + index = index + len(plasmidIncs) + (t&"Reference").add_face(addFace("MLSTScheme"), index, "aligned") + index = index + 1 + (t&"Reference").add_face(addFace("Sequence Type"), index, "aligned") + index = index + 1 + (t&"Reference").add_face(addFace("Carbapenamases"), index, "aligned") + index = index + 1 + for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds the distance matrix + (t&"Reference").add_face(addFace(distanceDict[list(distanceDict.keys())[0]][i]), index + i, "aligned") + index = index + len(distanceDict[list(distanceDict.keys())[0]]) + elif (n.is_leaf() and not n.name == "Reference"): + #not reference branches, populate with metadata + index = 0 + mData = metadata[n.name.replace(".fa","")] + n.add_face(addFace(mData.ID), index, "aligned") + index = index + 1 + if (metadata[n.name.replace(".fa","")].new == True): #new column + face = e.RectFace(30,30,"green","green") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 - n.add_face(face, 0, "aligned") + n.add_face(face, index, "aligned") + index = index + 1 for incs in plasmidIncs: #this loop adds presence/absence to the sample nodes if (n.name.replace(".fa","") in plasmidIncs[incs]): - face = ete3.faces.RectFace(30,30,"black","black") # TextFace("Y",fsize=10,tight_text=True) + face = e.RectFace(30,30,"black","black") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 - n.add_face(face, list(plasmidIncs.keys()).index(incs) + 1, "aligned") - mData = metadata[n.name.replace(".fa","")] - face = ete3.faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 5 - face.margin_left = 5 - n.add_face(face, len(plasmidIncs) + 0 + 1, "aligned") - face = ete3.faces.TextFace(mData.SequenceType,fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 5 - face.margin_left = 5 - n.add_face(face, len(plasmidIncs) + 1 + 1, "aligned") - face = ete3.faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True) - face.margin_right = 5 - face.margin_left = 5 - n.add_face(face, len(plasmidIncs) + 2 + 1, "aligned") + n.add_face(face, list(plasmidIncs.keys()).index(incs) + index, "aligned") + index = index + len(plasmidIncs) + n.add_face(addFace(mData.MLSTSpecies), index, "aligned") + index = index + 1 + n.add_face(addFace(mData.SequenceType), index, "aligned") + index = index + 1 + n.add_face(addFace(mData.CarbapenemResistanceGenes), index, "aligned") + index = index + 1 for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds distance matrix - face = ete3.faces.TextFace(list(distanceDict[n.name])[i],fsize=10,tight_text=True) - face.border.margin = 5 - face.margin_right = 5 - face.margin_left = 5 - n.add_face(face, len(plasmidIncs) + 2 + i + 1 + 1, "aligned") - - t.render("./tree.png", w=5000,units="mm", tree_style=ts) + n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned") + + t.render("./tree.png", w=5000,units="mm", tree_style=ts) #save it as a png. or an phyloxml #endregion #endregion diff -r 698579246d0d -r cabceaa239e4 cpo_galaxy_tree.xml --- a/cpo_galaxy_tree.xml Tue Aug 21 17:53:08 2018 -0400 +++ b/cpo_galaxy_tree.xml Thu Aug 23 12:21:15 2018 -0400 @@ -4,6 +4,7 @@ pandas python ete3 + pyqt - \ No newline at end of file + diff -r 698579246d0d -r cabceaa239e4 cpo_plasmidfinder.xml --- a/cpo_plasmidfinder.xml Tue Aug 21 17:53:08 2018 -0400 +++ b/cpo_plasmidfinder.xml Thu Aug 23 12:21:15 2018 -0400 @@ -1,5 +1,5 @@ - - this tool parses stuff + + Modified plasmidfinder v0.8 with custom database abricate diff -r 698579246d0d -r cabceaa239e4 cpo_prediction.tar.gz Binary file cpo_prediction.tar.gz has changed