comparison bin/SeqSero2_package.py @ 8:357e38526e2a draft

planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
author cstrittmatter
date Thu, 30 Apr 2020 21:16:45 -0400
parents aa54a94b9aeb
children e6437d423693
comparison
equal deleted inserted replaced
7:aa54a94b9aeb 8:357e38526e2a
20 from version import SeqSero2_version 20 from version import SeqSero2_version
21 21
22 ### SeqSero Kmer 22 ### SeqSero Kmer
23 def parse_args(): 23 def parse_args():
24 "Parse the input arguments, use '-h' for help." 24 "Parse the input arguments, use '-h' for help."
25 parser = argparse.ArgumentParser(usage='SeqSero2_package.py -t <data_type> -m <mode> -i <input_data> [-d <output_directory>] [-p <number of threads>] [-b <BWA_algorithm>]\n\nDevelopper: Shaokang Zhang (zskzsk@uga.edu), Hendrik C Den-Bakker (Hendrik.DenBakker@uga.edu) and Xiangyu Deng (xdeng@uga.edu)\n\nContact email:seqsero@gmail.com\n\nVersion: v1.1.0')#add "-m <data_type>" in future 25 parser = argparse.ArgumentParser(usage='SeqSero2_package.py -t <data_type> -m <mode> -i <input_data> [-d <output_directory>] [-p <number of threads>] [-b <BWA_algorithm>]\n\nDevelopper: Shaokang Zhang (zskzsk@uga.edu), Hendrik C Den-Bakker (Hendrik.DenBakker@uga.edu) and Xiangyu Deng (xdeng@uga.edu)\n\nContact email:seqsero@gmail.com\n\nVersion: v1.1.1')#add "-m <data_type>" in future
26 parser.add_argument("-i",nargs="+",help="<string>: path/to/input_data",type=os.path.abspath) ### add 'type=os.path.abspath' to generate absolute path of input data. 26 parser.add_argument("-i",nargs="+",help="<string>: path/to/input_data",type=os.path.abspath) ### add 'type=os.path.abspath' to generate absolute path of input data.
27 parser.add_argument("-t",choices=['1','2','3','4','5','6'],help="<int>: '1' for interleaved paired-end reads, '2' for separated paired-end reads, '3' for single reads, '4' for genome assembly, '5' for nanopore fasta, '6' for nanopore fastq") 27 parser.add_argument("-t",choices=['1','2','3','4','5','6'],help="<int>: '1' for interleaved paired-end reads, '2' for separated paired-end reads, '3' for single reads, '4' for genome assembly, '5' for nanopore fasta, '6' for nanopore fastq")
28 parser.add_argument("-b",choices=['sam','mem'],default="mem",help="<string>: algorithms for bwa mapping for allele mode; 'mem' for mem, 'sam' for samse/sampe; default=mem; optional; for now we only optimized for default 'mem' mode") 28 parser.add_argument("-b",choices=['sam','mem'],default="mem",help="<string>: algorithms for bwa mapping for allele mode; 'mem' for mem, 'sam' for samse/sampe; default=mem; optional; for now we only optimized for default 'mem' mode")
29 parser.add_argument("-p",default="1",help="<int>: number of threads for allele mode, if p >4, only 4 threads will be used for assembly since the amount of extracted reads is small, default=1") 29 parser.add_argument("-p",default="1",help="<int>: number of threads for allele mode, if p >4, only 4 threads will be used for assembly since the amount of extracted reads is small, default=1")
30 parser.add_argument("-m",choices=['k','a'],default="a",help="<string>: which workflow to apply, 'a'(raw reads allele micro-assembly), 'k'(raw reads and genome assembly k-mer), default=a") 30 parser.add_argument("-m",choices=['k','a'],default="a",help="<string>: which workflow to apply, 'a'(raw reads allele micro-assembly), 'k'(raw reads and genome assembly k-mer), default=a")
1238 samid_strcmd = "python " + dirpath + "/../SalmID.py -i "+fnameA 1238 samid_strcmd = "python " + dirpath + "/../SalmID.py -i "+fnameA
1239 #subprocess.check_call(samid_strcmd + " > Salmidoutput.txt",shell=True) 1239 #subprocess.check_call(samid_strcmd + " > Salmidoutput.txt",shell=True)
1240 print(samid_strcmd) 1240 print(samid_strcmd)
1241 salmID_output=subprocess.Popen(samid_strcmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) 1241 salmID_output=subprocess.Popen(samid_strcmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
1242 out, err = salmID_output.communicate() 1242 out, err = salmID_output.communicate()
1243 print(err) 1243 print("judge_subspecies -> err = salmID_output.communicate() " + err)
1244 print(out)
1245 out=out.decode("utf-8") 1244 out=out.decode("utf-8")
1246 file=open("data_log.txt","a") 1245 file=open("data_log.txt","a")
1247 file.write(out) 1246 file.write(out)
1248 file.close() 1247 file.close()
1249
1250 salm_species_scores=out.split("\n")[1].split("\t")[6:] 1248 salm_species_scores=out.split("\n")[1].split("\t")[6:]
1251 salm_species_results=out.split("\n")[0].split("\t")[6:] 1249 salm_species_results=out.split("\n")[0].split("\t")[6:]
1252 max_score=0 1250 max_score=0
1253 max_score_index=1 #default is 1, means "I" 1251 max_score_index=1 #default is 1, means "I"
1254 for i in range(len(salm_species_scores)): 1252 for i in range(len(salm_species_scores)):
1261 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part 1259 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part
1262 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold 1260 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold
1263 if max_score<60: 1261 if max_score<60:
1264 prediction="-" 1262 prediction="-"
1265 return prediction 1263 return prediction
1266
1267 1264
1268 def judge_subspecies_Kmer(Special_dict): 1265 def judge_subspecies_Kmer(Special_dict):
1269 #seqsero2 -k; 1266 #seqsero2 -k;
1270 max_score=0 1267 max_score=0
1271 prediction="-" #default should be I 1268 prediction="-" #default should be I
1329 else: 1326 else:
1330 request_id = time.strftime("%m_%d_%Y_%H_%M_%S", time.localtime()) 1327 request_id = time.strftime("%m_%d_%Y_%H_%M_%S", time.localtime())
1331 request_id += str(random.randint(1, 10000000)) 1328 request_id += str(random.randint(1, 10000000))
1332 if make_dir is None: 1329 if make_dir is None:
1333 make_dir="SeqSero_result_"+request_id 1330 make_dir="SeqSero_result_"+request_id
1331 make_dir=os.path.abspath(make_dir)
1334 if os.path.isdir(make_dir): 1332 if os.path.isdir(make_dir):
1335 pass 1333 pass
1336 else: 1334 else:
1337 subprocess.check_call(["mkdir",make_dir]) 1335 subprocess.check_call("mkdir -p "+make_dir,shell=True)
1338 #subprocess.check_call("cp "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) 1336 #subprocess.check_call("cp "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True)
1339 #subprocess.check_call("ln -sr "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) 1337 #subprocess.check_call("ln -sr "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True)
1340 subprocess.check_call("ln -f -s "+seqsero2_db+" "+" ".join(input_file)+" "+make_dir,shell=True) # ed_SL_11092019: change path to database for packaging 1338 subprocess.check_call("ln -f -s "+seqsero2_db+" "+" ".join(input_file)+" "+make_dir,shell=True) # ed_SL_11092019: change path to database for packaging
1341 #subprocess.check_call("ln -f -s "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) ### use -f option to force the replacement of links, remove -r and use absolute path instead to avoid link issue (use 'type=os.path.abspath' in -i argument). 1339 #subprocess.check_call("ln -f -s "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) ### use -f option to force the replacement of links, remove -r and use absolute path instead to avoid link issue (use 'type=os.path.abspath' in -i argument).
1342 ############################begin the real analysis 1340 ############################begin the real analysis