annotate cpo_galaxy_prediction.py @ 17:ed3b291693fc draft

planemo upload
author jjjjia
date Mon, 27 Aug 2018 19:58:24 -0400
parents a14b12a71a53
children 596bf8a792de
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
1 #!/home/jjjjia/.conda/envs/py36/bin/python
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
2
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
3 #$ -S /home/jjjjia/.conda/envs/py36/bin/python
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
4 #$ -V # Pass environment variables to the job
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
5 #$ -N CPO_pipeline # Replace with a more specific job name
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
6 #$ -wd /home/jjjjia/testCases # Use the current working dir
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
7 #$ -pe smp 8 # Parallel Environment (how many cores)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
8 #$ -l h_vmem=11G # Memory (RAM) allocation *per core*
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
9 #$ -e ./logs/$JOB_ID.err
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
10 #$ -o ./logs/$JOB_ID.log
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
11 #$ -m ea
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
12 #$ -M bja20@sfu.ca
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
13
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
14 #./prediction.py -i ~/testCases/cpoResults/contigs/BC11-Kpn005_S2.fa -m ~/testCases/predictionResultsQsubTest/predictions/BC11-Kpn005_S2.mlst -c ~/testCases/predictionResultsQsubTest/predictions/BC11-Kpn005_S2.recon/contig_report.txt -f ~/testCases/predictionResultsQsubTest/predictions/BC11-Kpn005_S2.recon/mobtyper_aggregate_report.txt -a ~/testCases/predictionResultsQsubTest/predictions/BC11-Kpn005_S2.cp -r ~/testCases/predictionResultsQsubTest/predictions/BC11-Kpn005_S2.rgi.txt -e "Klebsiella"
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
15 import subprocess
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
16 import pandas
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
17 import optparse
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
18 import os
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
19 import datetime
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
20 import sys
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
21 import time
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
22 import urllib.request
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
23 import gzip
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
24 import collections
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
25 import json
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
26 import numpy
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
27
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
28
6
cabceaa239e4 planemo upload
jjjjia
parents: 5
diff changeset
29 debug = False #debug skips the shell scripts and also dump out a ton of debugging messages
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
30
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
31 if not debug:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
32 #parses some parameters
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
33 parser = optparse.OptionParser("Usage: %prog [options] arg1 arg2 ...")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
34 #required
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
35 #MLSTHIT, mobsuite, resfinder, rgi, mlstscheme
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
36 parser.add_option("-i", "--id", dest="id", type="string", help="identifier of the isolate")
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
37 parser.add_option("-m", "--mlst", dest="mlst", type="string", help="absolute file path to mlst result")
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
38 parser.add_option("-c", "--mobfinderContig", dest="mobfinderContig", type="string", help="absolute path to mobfinder aggregate result")
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
39 parser.add_option("-f", "--mobfinderAggregate", dest="mobfinderAggregate", type="string", help="absolute path to mobfinder plasmid results")
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
40 parser.add_option("-a", "--abricate", dest="abricate", type="string", help="absolute path to abricate results")
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
41 parser.add_option("-r", "--rgi", dest="rgi", type="string", help="absolute path to rgi results")
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
42 parser.add_option("-e", "--expected", dest="expectedSpecies", default="NA/NA/NA", type="string", help="expected species of the isolate")
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
43 parser.add_option("-s", "--mlst-scheme", dest="mlstScheme", default= "./scheme_species_map.tab", type="string", help="absolute file path to mlst scheme")
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
44 parser.add_option("-p", "--plasmidfinder", dest="plasmidfinder", type="string", help="absolute file path to plasmidfinder ")
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
45
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
46 #parallelization, useless, these are hard coded to 8cores/64G RAM
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
47 #parser.add_option("-t", "--threads", dest="threads", default=8, type="int", help="number of cpu to use")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
48 #parser.add_option("-p", "--memory", dest="memory", default=64, type="int", help="memory to use in GB")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
49
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
50 (options,args) = parser.parse_args()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
51 #if len(args) != 8:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
52 #parser.error("incorrect number of arguments, all 7 is required")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
53 curDir = os.getcwd()
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
54 ID = str(options.id).lstrip().rstrip()
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
55 mlst = str(options.mlst).lstrip().rstrip()
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
56 mobfindercontig = str(options.mobfinderContig).lstrip().rstrip()
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
57 mobfinderaggregate = str(options.mobfinderAggregate).lstrip().rstrip()
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
58 abricate = str(options.abricate).lstrip().rstrip()
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
59 rgi = str(options.rgi).lstrip().rstrip()
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
60 expectedSpecies = str(options.expectedSpecies).lstrip().rstrip()
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
61 mlstScheme = str(options.mlstScheme).lstrip().rstrip()
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
62 plasmidfinder = str(options.plasmidfinder).lstrip().rstrip()
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
63 outputDir = "./"
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
64 print(mlst)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
65 print(mobfindercontig)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
66 print(mobfinderaggregate)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
67 print(abricate)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
68 print(rgi)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
69 print(expectedSpecies)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
70 print(mlstScheme)
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
71 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
72 curDir = os.getcwd()
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
73 ID = "BC11"
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
74 mlst = "D:\OneDrive\ProjectCDC\ProjectCDCInPython\ProjectCDCInPython\pipelineTest\predictions\BC11-Kpn005_S2.mlst"
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
75 mobfindercontig = "D:\OneDrive\ProjectCDC\ProjectCDCInPython\ProjectCDCInPython\pipelineTest\predictions\BC11-Kpn005_S2.recon\contig_report.txt"
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
76 mobfinderaggregate = "D:\OneDrive\ProjectCDC\ProjectCDCInPython\ProjectCDCInPython\pipelineTest\predictions\BC11-Kpn005_S2.recon\mobtyper_aggregate_report.txt"
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
77 abricate = "D:\OneDrive\ProjectCDC\ProjectCDCInPython\ProjectCDCInPython\pipelineTest\predictions\BC11-Kpn005_S2.cp"
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
78 rgi = "D:\OneDrive\ProjectCDC\ProjectCDCInPython\ProjectCDCInPython\pipelineTest\predictions\BC11-Kpn005_S2.rgi.txt"
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
79 expectedSpecies = "Escherichia coli"
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
80 mlstScheme = "D:\OneDrive\ProjectCDC\ProjectCDCInPython\ProjectCDCInPython\pipelineTest\scheme_species_map.tab"
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
81 plasmidfinder = "D:\OneDrive\ProjectCDC\ProjectCDCInPython\ProjectCDCInPython\pipelineTest\predictions\BC11-Kpn005_S2.origins"
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
82 outputDir = "./"
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
83
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
84 #region result objects
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
85 #define some objects to store values from results
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
86 #//TODO this is not the proper way of get/set private object variables. every value has manually assigned defaults intead of specified in init(). Also, use property(def getVar, def setVar).
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
87 class starFinders(object):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
88 def __init__(self):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
89 self.file = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
90 self.sequence = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
91 self.start = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
92 self.end = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
93 self.gene = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
94 self.shortGene = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
95 self.coverage = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
96 self.coverage_map = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
97 self.gaps = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
98 self.pCoverage = 100.00
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
99 self.pIdentity = 100.00
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
100 self.database = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
101 self.accession = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
102 self.product = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
103 self.source = "chromosome"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
104 self.row = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
105
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
106 class PlasFlowResult(object):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
107 def __init__(self):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
108 self.sequence = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
109 self.length = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
110 self.label = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
111 self.confidence = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
112 self.usefulRow = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
113 self.row = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
114
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
115 class MlstResult(object):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
116 def __init__(self):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
117 self.file = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
118 self.speciesID = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
119 self.seqType = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
120 self.scheme = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
121 self.species = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
122 self.row=""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
123
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
124 class mobsuiteResult(object):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
125 def __init__(self):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
126 self.file_id = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
127 self.cluster_id = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
128 self.contig_id = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
129 self.contig_num = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
130 self.contig_length = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
131 self.circularity_status = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
132 self.rep_type = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
133 self.rep_type_accession = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
134 self.relaxase_type = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
135 self.relaxase_type_accession = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
136 self.mash_nearest_neighbor = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
137 self.mash_neighbor_distance = 0.00
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
138 self.repetitive_dna_id = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
139 self.match_type = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
140 self.score = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
141 self.contig_match_start = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
142 self.contig_match_end = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
143 self.row = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
144
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
145 class mobsuitePlasmids(object):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
146 def __init__(self):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
147 self.file_id = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
148 self.num_contigs = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
149 self.total_length = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
150 self.gc = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
151 self.rep_types = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
152 self.rep_typeAccession = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
153 self.relaxase_type= ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
154 self.relaxase_type_accession = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
155 self.mpf_type = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
156 self.mpf_type_accession= ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
157 self.orit_type = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
158 self.orit_accession = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
159 self.PredictedMobility = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
160 self.mash_nearest_neighbor = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
161 self.mash_neighbor_distance = 0.00
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
162 self.mash_neighbor_cluster= 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
163 self.row = ""
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
164
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
165 class RGIResult(object):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
166 def __init__(self):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
167 self.ORF_ID = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
168 self.Contig = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
169 self.Start = -1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
170 self.Stop = -1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
171 self.Orientation = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
172 self.Cut_Off = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
173 self.Pass_Bitscore = 100000
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
174 self.Best_Hit_Bitscore = 0.00
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
175 self.Best_Hit_ARO = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
176 self.Best_Identities = 0.00
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
177 self.ARO = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
178 self.Model_type = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
179 self.SNPs_in_Best_Hit_ARO = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
180 self.Other_SNPs = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
181 self.Drug_Class = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
182 self.Resistance_Mechanism = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
183 self.AMR_Gene_Family = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
184 self.Predicted_DNA = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
185 self.Predicted_Protein = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
186 self.CARD_Protein_Sequence = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
187 self.Percentage_Length_of_Reference_Sequence = 0.00
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
188 self.ID = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
189 self.Model_ID = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
190 self.source = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
191 self.row = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
192
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
193 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
194
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
195 #region useful functions
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
196 def read(path):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
197 return [line.rstrip('\n') for line in open(path)]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
198 def execute(command):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
199 process = subprocess.Popen(command, shell=False, cwd=curDir, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
200
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
201 # Poll process for new output until finished
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
202 while True:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
203 nextline = process.stdout.readline()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
204 if nextline == '' and process.poll() is not None:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
205 break
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
206 sys.stdout.write(nextline)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
207 sys.stdout.flush()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
208
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
209 output = process.communicate()[0]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
210 exitCode = process.returncode
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
211
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
212 if (exitCode == 0):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
213 return output
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
214 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
215 raise subprocess.CalledProcessError(exitCode, command)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
216 def httpGetFile(url, filepath=""):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
217 if (filepath == ""):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
218 return urllib.request.urlretrieve(url)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
219 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
220 urllib.request.urlretrieve(url, filepath)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
221 return True
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
222 def gunzip(inputpath="", outputpath=""):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
223 if (outputpath == ""):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
224 with gzip.open(inputpath, 'rb') as f:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
225 gzContent = f.read()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
226 return gzContent
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
227 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
228 with gzip.open(inputpath, 'rb') as f:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
229 gzContent = f.read()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
230 with open(outputpath, 'wb') as out:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
231 out.write(gzContent)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
232 return True
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
233 def ToJson(dictObject, outputPath):
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
234 #outDir = outputDir + '/summary/' + ID + ".json/"
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
235 #if not (os.path.exists(outDir)):
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
236 #os.makedirs(outDir)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
237 #with open(outputPath, 'w') as f:
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
238 #json.dump([ob.__dict__ for ob in dictObject.values()], f, ensure_ascii=False)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
239 return ""
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
240 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
241
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
242 #region functions to parse result files
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
243 def ParseMLSTResult(pathToMLSTResult, scheme):
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
244 _mlstResult = {}
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
245 scheme = pandas.read_csv(scheme, delimiter='\t', header=0)
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
246 scheme = scheme.replace(numpy.nan, '', regex=True)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
247
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
248 taxon = {}
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
249 #record the scheme as a dictionary
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
250 taxon["-"] = "No MLST Match"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
251 for i in range(len(scheme.index)):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
252 key = scheme.iloc[i,0]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
253 if (str(scheme.iloc[i,2]) == "nan"):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
254 value = str(scheme.iloc[i,1])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
255 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
256 value = str(scheme.iloc[i,1]) + " " + str(scheme.iloc[i,2])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
257
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
258 if (key in taxon.keys()):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
259 taxon[key] = taxon.get(key) + ";" + value
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
260 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
261 taxon[key] = value
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
262 #read in the mlst result
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
263 mlst = pandas.read_csv(pathToMLSTResult, delimiter='\t', header=None)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
264 _mlstHit = MlstResult()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
265
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
266 _mlstHit.file = mlst.iloc[0,0]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
267 _mlstHit.speciesID = (mlst.iloc[0,1])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
268 _mlstHit.seqType = str(mlst.iloc[0,2])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
269 for i in range(3, len(mlst.columns)):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
270 _mlstHit.scheme += mlst.iloc[0,i] + ";"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
271 _mlstHit.species = taxon[_mlstHit.speciesID]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
272 _mlstHit.row = "\t".join(str(x) for x in mlst.ix[0].tolist())
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
273 _mlstResult[_mlstHit.speciesID]=_mlstHit
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
274
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
275 return _mlstResult
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
276
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
277 def ParsePlasmidFinderResult(pathToPlasmidFinderResult):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
278 #pipelineTest/contigs/BC110-Kpn005.fa contig00019 45455 45758 IncFIC(FII)_1 8-308/499 ========/=..... 8/11 59.52 75.65 plasmidfinder AP001918 IncFIC(FII)_1__AP001918
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
279 #example resfinder:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
280 #pipelineTest/contigs/BC110-Kpn005.fa contig00038 256 1053 OXA-181 1-798/798 =============== 0/0 100.00 100.00 bccdc AEP16366.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-181
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
281
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
282 _pFinder = {} #***********************
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
283 plasmidFinder = pandas.read_csv(pathToPlasmidFinderResult, delimiter='\t', header=0)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
284 plasmidFinder = plasmidFinder.replace(numpy.nan, '', regex=True)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
285
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
286
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
287 for i in range(len(plasmidFinder.index)):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
288 pf = starFinders()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
289 pf.file = str(plasmidFinder.iloc[i,0])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
290 pf.sequence = str(plasmidFinder.iloc[i,1])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
291 pf.start = int(plasmidFinder.iloc[i,2])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
292 pf.end = int(plasmidFinder.iloc[i,3])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
293 pf.gene = str(plasmidFinder.iloc[i,4])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
294 pf.shortGene = pf.gene[:pf.gene.index("_")]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
295 pf.coverage = str(plasmidFinder.iloc[i,5])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
296 pf.coverage_map = str(plasmidFinder.iloc[i,6])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
297 pf.gaps = str(plasmidFinder.iloc[i,7])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
298 pf.pCoverage = float(plasmidFinder.iloc[i,8])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
299 pf.pIdentity = float(plasmidFinder.iloc[i,9])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
300 pf.database = str(plasmidFinder.iloc[i,10])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
301 pf.accession = str(plasmidFinder.iloc[i,11])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
302 pf.product = str(plasmidFinder.iloc[i,12])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
303 pf.source = "plasmid"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
304 pf.row = "\t".join(str(x) for x in plasmidFinder.ix[i].tolist())
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
305 _pFinder[pf.gene]=pf
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
306 #row = "\t".join(str(x) for x in plasmidFinder.ix[i].tolist())
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
307 #plasmidFinderContigs.append(str(plasmidFinder.iloc[i,1]))
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
308 #origins.append(str(plasmidFinder.iloc[i,4][:plasmidFinder.iloc[i,4].index("_")]))
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
309 return _pFinder
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
310
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
311 def ParseMobsuiteResult(pathToMobsuiteResult):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
312 _mobsuite = {}
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
313 mResult = pandas.read_csv(pathToMobsuiteResult, delimiter='\t', header=0)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
314 mResult = mResult.replace(numpy.nan, '', regex=True)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
315
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
316 for i in range(len(mResult.index)):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
317 mr = mobsuiteResult()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
318 mr.file_id = str(mResult.iloc[i,0])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
319 mr.cluster_id = str(mResult.iloc[i,1])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
320 if (mr.cluster_id == "chromosome"):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
321 break
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
322 mr.contig_id = str(mResult.iloc[i,2])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
323 mr.contig_num = mr.contig_id[(mr.contig_id.find("contig")+6):mr.contig_id.find("_len=")]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
324 mr.contig_length = int(mResult.iloc[i,3])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
325 mr.circularity_status = str(mResult.iloc[i,4])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
326 mr.rep_type = str(mResult.iloc[i,5])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
327 mr.rep_type_accession = str(mResult.iloc[i,6])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
328 mr.relaxase_type = str(mResult.iloc[i,7])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
329 mr.relaxase_type_accession = str(mResult.iloc[i,8])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
330 mr.mash_nearest_neighbor = str(mResult.iloc[i,9])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
331 mr.mash_neighbor_distance = float(mResult.iloc[i,10])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
332 mr.repetitive_dna_id = str(mResult.iloc[i,11])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
333 mr.match_type = str(mResult.iloc[i,12])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
334 if (mr.match_type == ""):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
335 mr.score = -1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
336 mr.contig_match_start = -1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
337 mr.contig_match_end = -1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
338 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
339 mr.score = int(mResult.iloc[i,13])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
340 mr.contig_match_start = int(mResult.iloc[i,14])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
341 mr.contig_match_end = int(mResult.iloc[i,15])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
342 mr.row = "\t".join(str(x) for x in mResult.ix[i].tolist())
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
343 _mobsuite[mr.contig_id]=(mr)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
344 return _mobsuite
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
345
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
346 def ParseMobsuitePlasmids(pathToMobsuiteResult):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
347 _mobsuite = {}
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
348 mResults = pandas.read_csv(pathToMobsuiteResult, delimiter='\t', header=0)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
349 mResults = mResults.replace(numpy.nan, '', regex=True)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
350
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
351 for i in range(len(mResults.index)):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
352 mr = mobsuitePlasmids()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
353 mr.file_id = str(mResults.iloc[i,0])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
354 mr.num_contigs = int(mResults.iloc[i,1])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
355 mr.total_length = int(mResults.iloc[i,2])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
356 mr.gc = int(mResults.iloc[i,3])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
357 mr.rep_types = str(mResults.iloc[i,4])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
358 mr.rep_typeAccession = str(mResults.iloc[i,5])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
359 mr.relaxase_type = str(mResults.iloc[i,6])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
360 mr.relaxase_type_accession = str(mResults.iloc[i,7])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
361 mr.mpf_type = str(mResults.iloc[i,8])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
362 mr.mpf_type_accession = str(mResults.iloc[i,9])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
363 mr.orit_type = str(mResults.iloc[i,10])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
364 mr.orit_accession = str(mResults.iloc[i,11])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
365 mr.PredictedMobility = str(mResults.iloc[i,12])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
366 mr.mash_nearest_neighbor = str(mResults.iloc[i,13])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
367 mr.mash_neighbor_distance = float(mResults.iloc[i,14])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
368 mr.mash_neighbor_cluster = int(mResults.iloc[i,15])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
369 mr.row = "\t".join(str(x) for x in mResults.ix[i].tolist())
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
370 _mobsuite[mr.file_id] = mr
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
371 return _mobsuite
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
372
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
373 def ParseResFinderResult(pathToResFinderResults, plasmidContigs, likelyPlasmidContigs):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
374 _rFinder = {}
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
375 resFinder = pandas.read_csv(pathToResFinderResults, delimiter='\t', header=0)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
376 resFinder = resFinder.replace(numpy.nan, '', regex=True)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
377
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
378 for i in range(len(resFinder.index)):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
379 rf = starFinders()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
380 rf.file = str(resFinder.iloc[i,0])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
381 rf.sequence = str(resFinder.iloc[i,1])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
382 rf.start = int(resFinder.iloc[i,2])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
383 rf.end = int(resFinder.iloc[i,3])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
384 rf.gene = str(resFinder.iloc[i,4])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
385 rf.shortGene = rf.gene
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
386 rf.coverage = str(resFinder.iloc[i,5])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
387 rf.coverage_map = str(resFinder.iloc[i,6])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
388 rf.gaps = str(resFinder.iloc[i,7])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
389 rf.pCoverage = float(resFinder.iloc[i,8])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
390 rf.pIdentity = float(resFinder.iloc[i,9])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
391 rf.database = str(resFinder.iloc[i,10])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
392 rf.accession = str(resFinder.iloc[i,11])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
393 rf.product = str(resFinder.iloc[i,12])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
394 rf.row = "\t".join(str(x) for x in resFinder.ix[i].tolist())
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
395 if (rf.sequence[6:] in plasmidContigs):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
396 rf.source = "plasmid"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
397 elif (rf.sequence[6:] in likelyPlasmidContigs):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
398 rf.source = "likely plasmid"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
399 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
400 rf.source = "likely chromosome"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
401 _rFinder[rf.gene]=rf
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
402 return _rFinder
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
403
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
404 def ParseRGIResult(pathToRGIResults, plasmidContigs, likelyPlasmidContigs):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
405 _rgiR = {}
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
406 RGI = pandas.read_csv(pathToRGIResults, delimiter='\t', header=0)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
407 RGI = RGI.replace(numpy.nan, '', regex=True)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
408
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
409 for i in range(len(RGI.index)):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
410 r = RGIResult()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
411 r.ORF_ID = str(RGI.iloc[i,0])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
412 r.Contig = str(RGI.iloc[i,1])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
413 r.Contig_Num = r.Contig[6:r.Contig.find("_")]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
414 r.Start = int(RGI.iloc[i,2])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
415 r.Stop = int(RGI.iloc[i,3])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
416 r.Orientation = str(RGI.iloc[i,4])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
417 r.Cut_Off = str(RGI.iloc[i,5])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
418 r.Pass_Bitscore = int(RGI.iloc[i,6])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
419 r.Best_Hit_Bitscore = float(RGI.iloc[i,7])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
420 r.Best_Hit_ARO = str(RGI.iloc[i,8])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
421 r.Best_Identities = float(RGI.iloc[i,9])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
422 r.ARO = int(RGI.iloc[i,10])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
423 r.Model_type = str(RGI.iloc[i,11])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
424 r.SNPs_in_Best_Hit_ARO = str(RGI.iloc[i,12])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
425 r.Other_SNPs = str(RGI.iloc[i,13])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
426 r.Drug_Class = str(RGI.iloc[i,14])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
427 r.Resistance_Mechanism = str(RGI.iloc[i,15])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
428 r.AMR_Gene_Family = str(RGI.iloc[i,16])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
429 r.Predicted_DNA = str(RGI.iloc[i,17])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
430 r.Predicted_Protein = str(RGI.iloc[i,18])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
431 r.CARD_Protein_Sequence = str(RGI.iloc[i,19])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
432 r.Percentage_Length_of_Reference_Sequence = float(RGI.iloc[i,20])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
433 r.ID = str(RGI.iloc[i,21])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
434 r.Model_ID = int(RGI.iloc[i,22])
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
435 r.row = "\t".join(str(x) for x in RGI.ix[i].tolist())
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
436 if (r.Contig_Num in plasmidContigs):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
437 r.source = "plasmid"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
438 elif (r.Contig_Num in likelyPlasmidContigs):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
439 r.source = "likely plasmid"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
440 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
441 r.source = "likely chromosome"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
442 _rgiR[r.Model_ID]=r
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
443 return _rgiR
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
444
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
445 def ParsePlasmidFinderResult(pathToPlasmidFinderResult):
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
446 #pipelineTest/contigs/BC110-Kpn005.fa contig00019 45455 45758 IncFIC(FII)_1 8-308/499 ========/=..... 8/11 59.52 75.65 plasmidfinder AP001918 IncFIC(FII)_1__AP001918
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
447 #example resfinder:
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
448 #pipelineTest/contigs/BC110-Kpn005.fa contig00038 256 1053 OXA-181 1-798/798 =============== 0/0 100.00 100.00 bccdc AEP16366.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-181
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
449
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
450 _pFinder = {} #***********************
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
451 plasmidFinder = pandas.read_csv(pathToPlasmidFinderResult, delimiter='\t', header=0)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
452
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
453 for i in range(len(plasmidFinder.index)):
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
454 pf = starFinders()
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
455 pf.file = str(plasmidFinder.iloc[i,0])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
456 pf.sequence = str(plasmidFinder.iloc[i,1])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
457 pf.start = int(plasmidFinder.iloc[i,2])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
458 pf.end = int(plasmidFinder.iloc[i,3])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
459 pf.gene = str(plasmidFinder.iloc[i,4])
13
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
460 if (pf.gene.find("_") > -1):
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
461 pf.shortGene = pf.gene[:pf.gene.index("_")]
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
462 else:
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
463 pf.shortGene = pf.gene
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
464 pf.coverage = str(plasmidFinder.iloc[i,5])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
465 pf.coverage_map = str(plasmidFinder.iloc[i,6])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
466 pf.gaps = str(plasmidFinder.iloc[i,7])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
467 pf.pCoverage = float(plasmidFinder.iloc[i,8])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
468 pf.pIdentity = float(plasmidFinder.iloc[i,9])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
469 pf.database = str(plasmidFinder.iloc[i,10])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
470 pf.accession = str(plasmidFinder.iloc[i,11])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
471 pf.product = str(plasmidFinder.iloc[i,12])
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
472 pf.source = "plasmid"
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
473 pf.row = "\t".join(str(x) for x in plasmidFinder.ix[i].tolist())
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
474 _pFinder[pf.gene]=pf
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
475 #row = "\t".join(str(x) for x in plasmidFinder.ix[i].tolist())
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
476 #plasmidFinderContigs.append(str(plasmidFinder.iloc[i,1]))
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
477 #origins.append(str(plasmidFinder.iloc[i,4][:plasmidFinder.iloc[i,4].index("_")]))
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
478 return _pFinder
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
479 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
480
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
481 def Main():
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
482 outputDir = "./"
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
483 notes = []
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
484 #init the output list
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
485 output = []
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
486 jsonOutput = []
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
487
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
488 print(str(datetime.datetime.now()) + "\n\nID: " + ID + "\nAssembly: " + ID)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
489 output.append(str(datetime.datetime.now()) + "\n\nID: " + ID + "\nAssembly: " + ID)
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
490
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
491 #region parse the mlst results
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
492 print("step 3: parsing mlst, plasmid, and amr results")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
493
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
494 print("identifying MLST")
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
495 mlstHit = ParseMLSTResult(mlst, str(mlstScheme))#***********************
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
496 ToJson(mlstHit, "mlst.json") #write it to a json output
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
497 mlstHit = list(mlstHit.values())[0]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
498
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
499 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
500
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
501 #region parse mobsuite, resfinder and rgi results
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
502 print("identifying plasmid contigs and amr genes")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
503
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
504 plasmidContigs = []
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
505 likelyPlasmidContigs = []
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
506 origins = []
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
507
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
508 #parse mobsuite results
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
509 mSuite = ParseMobsuiteResult(mobfindercontig) #outputDir + "/predictions/" + ID + ".recon/contig_report.txt")#*************
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
510 ToJson(mSuite, "mobsuite.json") #*************
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
511 mSuitePlasmids = ParseMobsuitePlasmids(mobfinderaggregate)#outputDir + "/predictions/" + ID + ".recon/mobtyper_aggregate_report.txt")#*************
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
512 ToJson(mSuitePlasmids, "mobsuitePlasmids.json") #*************
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
513
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
514 for key in mSuite:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
515 if mSuite[key].contig_num not in plasmidContigs and mSuite[key].contig_num not in likelyPlasmidContigs:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
516 if not (mSuite[key].rep_type == ''):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
517 plasmidContigs.append(mSuite[key].contig_num)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
518 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
519 likelyPlasmidContigs.append(mSuite[key].contig_num)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
520 for key in mSuite:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
521 if mSuite[key].rep_type not in origins:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
522 origins.append(mSuite[key].rep_type)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
523
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
524 #parse resfinder AMR results
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
525 pFinder = ParsePlasmidFinderResult(plasmidfinder)
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
526 ToJson(pFinder, "origins.json")
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
527
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
528 rFinder = ParseResFinderResult(abricate, plasmidContigs, likelyPlasmidContigs)#outputDir + "/predictions/" + ID + ".cp", plasmidContigs, likelyPlasmidContigs) #**********************
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
529 ToJson(rFinder, "resfinder.json") #*************
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
530
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
531 rgiAMR = ParseRGIResult(rgi, plasmidContigs, likelyPlasmidContigs) # outputDir + "/predictions/" + ID + ".rgi.txt", plasmidContigs, likelyPlasmidContigs)#***********************
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
532 ToJson(rgiAMR, "rgi.json") #*************
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
533
13
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
534 carbapenamases = []
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
535 resfinderCarbas = [] #list of rfinder objects for lindaout list
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
536 amrGenes = []
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
537 for keys in rFinder:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
538 carbapenamases.append(rFinder[keys].shortGene + "(" + rFinder[keys].source + ")")
13
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
539 resfinderCarbas.append(rFinder[keys])
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
540 for keys in rgiAMR:
13
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
541 if (rgiAMR[keys].Drug_Class.find("carbapenem") > -1 and rgiAMR[keys].AMR_Gene_Family.find("beta-lactamase") > -1):
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
542 if (rgiAMR[keys].Best_Hit_ARO not in carbapenamases):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
543 carbapenamases.append(rgiAMR[keys].Best_Hit_ARO+ "(" + rgiAMR[keys].source + ")")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
544 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
545 if (rgiAMR[keys].Best_Hit_ARO not in amrGenes):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
546 amrGenes.append(rgiAMR[keys].Best_Hit_ARO+ "(" + rgiAMR[keys].source + ")")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
547 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
548
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
549 #region output parsed mlst information
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
550 print("formatting mlst outputs")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
551 output.append("\n\n\n~~~~~~~MLST summary~~~~~~~")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
552 output.append("MLST determined species: " + mlstHit.species)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
553 output.append("\nMLST Details: ")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
554 output.append(mlstHit.row)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
555
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
556 output.append("\nMLST information: ")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
557 if (mlstHit.species == expectedSpecies):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
558 output.append("MLST determined species is the same as expected species")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
559 #notes.append("MLST determined species is the same as expected species")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
560 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
561 output.append("!!!MLST determined species is NOT the same as expected species, contamination? mislabeling?")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
562 notes.append("MLST: Not expected species. Possible contamination or mislabeling")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
563
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
564 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
565
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
566 #region output the parsed plasmid/amr results
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
567 output.append("\n\n\n~~~~~~~~Plasmids~~~~~~~~\n")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
568
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
569 output.append("predicted plasmid origins: ")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
570 output.append(";".join(origins))
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
571
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
572 output.append("\ndefinitely plasmid contigs")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
573 output.append(";".join(plasmidContigs))
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
574
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
575 output.append("\nlikely plasmid contigs")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
576 output.append(";".join(likelyPlasmidContigs))
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
577
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
578 output.append("\nmob-suite prediction details: ")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
579 for key in mSuite:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
580 output.append(mSuite[key].row)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
581
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
582 output.append("\n\n\n~~~~~~~~AMR Genes~~~~~~~~\n")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
583 output.append("predicted carbapenamase Genes: ")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
584 output.append(",".join(carbapenamases))
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
585 output.append("other RGI AMR Genes: ")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
586 for key in rgiAMR:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
587 output.append(rgiAMR[key].Best_Hit_ARO + "(" + rgiAMR[key].source + ")")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
588
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
589 output.append("\nDetails about the carbapenamase Genes: ")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
590 for key in rFinder:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
591 output.append(rFinder[key].row)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
592 output.append("\nDetails about the RGI AMR Genes: ")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
593 for key in rgiAMR:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
594 output.append(rgiAMR[key].row)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
595
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
596 #write summary to a file
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
597 summaryDir = outputDir + "/summary/" + ID
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
598 out = open("summary.txt", 'w')
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
599 for item in output:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
600 out.write("%s\n" % item)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
601
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
602
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
603 #TSV output
6
cabceaa239e4 planemo upload
jjjjia
parents: 5
diff changeset
604 lindaOut = []
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
605 tsvOut = []
13
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
606 lindaOut.append("ID\tQUALITY\tExpected Species\tMLST Scheme\tSequence Type\tMLST_ALLELE_1\tMLST_ALLELE_2\tMLST_ALLELE_3\tMLST_ALLELE_4\tMLST_ALLELE_5\tMLST_ALLELE_6\tMLST_ALLELE_7\tSEROTYPE\tK_CAPSULE\tPLASMID_2_RFLP\tPLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH\tPLASMID_1_CARBAPENEMASE\tPLASMID_1_INC_GROUP\tPLASMID_2_RFLP\tPLASMID_2_FAMILY\tPLASMID_2_BEST_MATCH\tPLASMID_2_COVERAGE\tPLASMID_2_SNVS_TO_BEST_MATCH\tPLASMID_2_CARBAPENEMASE\tPLASMID_2_INC_GROUP")
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
607 lindaTemp = ID + "\t" #id
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
608 lindaTemp += "\t" #quality
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
609 lindaTemp += expectedSpecies + "\t" #expected
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
610 lindaTemp += mlstHit.species + "\t" #mlstscheme
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
611 lindaTemp += str(mlstHit.seqType) + "\t" #seq type
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
612 lindaTemp += "\t".join(mlstHit.scheme.split(";")) + "\t"#mlst alleles x 7
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
613 lindaTemp += "\t\t" #sero and kcap
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
614
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
615 #resfinderCarbas
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
616 for carbs in resfinderCarbas:
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
617 if (carbs.source == "plasmid"): #
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
618 lindaTemp += "\t\t\t\t\t" #plasmid 1 rflp plasmid 1 family information. PLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
619 lindaTemp += carbs.shortGene + "\t" #found an carbapenase
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
620 contig = carbs.sequence[6:] #this is the contig number
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
621 for i in mSuite.keys():
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
622 if (str(mSuite[i].contig_num) == str(contig)): #found the right plasmid
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
623 lindaTemp += mSuite[i].rep_type
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
624 lindaOut.append(lindaTemp)
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
625 out = open("summary.linda.tsv", 'w')
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
626 for item in lindaOut:
a14b12a71a53 planemo upload
jjjjia
parents: 6
diff changeset
627 out.write("%s\n" % item)
6
cabceaa239e4 planemo upload
jjjjia
parents: 5
diff changeset
628
cabceaa239e4 planemo upload
jjjjia
parents: 5
diff changeset
629 tsvOut.append("new\tID\tExpected Species\tMLST Species\tSequence Type\tMLST Scheme\tCarbapenem Resistance Genes\tOther AMR Genes\tTotal Plasmids\tPlasmids ID\tNum_Contigs\tPlasmid Length\tPlasmid RepType\tPlasmid Mobility\tNearest Reference\tDefinitely Plasmid Contigs\tLikely Plasmid Contigs")
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
630 #start with ID
6
cabceaa239e4 planemo upload
jjjjia
parents: 5
diff changeset
631 temp = "\t"
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
632 temp += (ID + "\t")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
633 temp += expectedSpecies + "\t"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
634
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
635 #move into MLST
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
636 temp += mlstHit.species + "\t"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
637 temp += str(mlstHit.seqType) + "\t"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
638 temp += mlstHit.scheme + "\t"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
639
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
640 #now onto AMR genes
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
641 temp += ";".join(carbapenamases) + "\t"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
642 temp += ";".join(amrGenes) + "\t"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
643
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
644 #lastly plasmids
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
645 temp+= str(len(mSuitePlasmids)) + "\t"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
646 plasmidID = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
647 contigs = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
648 lengths = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
649 rep_type = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
650 mobility = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
651 neighbour = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
652 for keys in mSuitePlasmids:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
653 plasmidID += str(mSuitePlasmids[keys].mash_neighbor_cluster) + ";"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
654 contigs += str(mSuitePlasmids[keys].num_contigs) + ";"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
655 lengths += str(mSuitePlasmids[keys].total_length) + ";"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
656 rep_type += str(mSuitePlasmids[keys].rep_types) + ";"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
657 mobility += str(mSuitePlasmids[keys].PredictedMobility) + ";"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
658 neighbour += str(mSuitePlasmids[keys].mash_nearest_neighbor) + ";"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
659 temp += plasmidID + "\t" + contigs + "\t" + lengths + "\t" + rep_type + "\t" + mobility + "\t" + neighbour + "\t"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
660 temp += ";".join(plasmidContigs) + "\t"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
661 temp += ";".join(likelyPlasmidContigs)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
662 tsvOut.append(temp)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
663
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
664 summaryDir = outputDir + "/summary/" + ID
3
e6027598a35c planemo upload
jjjjia
parents: 2
diff changeset
665 out = open("summary.tsv", 'w')
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
666 for item in tsvOut:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
667 out.write("%s\n" % item)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
668 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
669
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
670
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
671 start = time.time()#time the analysis
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
672 print("Starting workflow...")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
673 #analysis time
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
674 Main()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
675
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
676 end = time.time()
5
698579246d0d planemo upload
jjjjia
parents: 4
diff changeset
677 print("Finished!\nThe analysis used: " + str(end-start) + " seconds")