Mercurial > repos > cstrittmatter > ss2v110
comparison bin/SeqSero2_package.py @ 8:357e38526e2a draft
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
author | cstrittmatter |
---|---|
date | Thu, 30 Apr 2020 21:16:45 -0400 |
parents | aa54a94b9aeb |
children | e6437d423693 |
comparison
equal
deleted
inserted
replaced
7:aa54a94b9aeb | 8:357e38526e2a |
---|---|
20 from version import SeqSero2_version | 20 from version import SeqSero2_version |
21 | 21 |
22 ### SeqSero Kmer | 22 ### SeqSero Kmer |
23 def parse_args(): | 23 def parse_args(): |
24 "Parse the input arguments, use '-h' for help." | 24 "Parse the input arguments, use '-h' for help." |
25 parser = argparse.ArgumentParser(usage='SeqSero2_package.py -t <data_type> -m <mode> -i <input_data> [-d <output_directory>] [-p <number of threads>] [-b <BWA_algorithm>]\n\nDevelopper: Shaokang Zhang (zskzsk@uga.edu), Hendrik C Den-Bakker (Hendrik.DenBakker@uga.edu) and Xiangyu Deng (xdeng@uga.edu)\n\nContact email:seqsero@gmail.com\n\nVersion: v1.1.0')#add "-m <data_type>" in future | 25 parser = argparse.ArgumentParser(usage='SeqSero2_package.py -t <data_type> -m <mode> -i <input_data> [-d <output_directory>] [-p <number of threads>] [-b <BWA_algorithm>]\n\nDevelopper: Shaokang Zhang (zskzsk@uga.edu), Hendrik C Den-Bakker (Hendrik.DenBakker@uga.edu) and Xiangyu Deng (xdeng@uga.edu)\n\nContact email:seqsero@gmail.com\n\nVersion: v1.1.1')#add "-m <data_type>" in future |
26 parser.add_argument("-i",nargs="+",help="<string>: path/to/input_data",type=os.path.abspath) ### add 'type=os.path.abspath' to generate absolute path of input data. | 26 parser.add_argument("-i",nargs="+",help="<string>: path/to/input_data",type=os.path.abspath) ### add 'type=os.path.abspath' to generate absolute path of input data. |
27 parser.add_argument("-t",choices=['1','2','3','4','5','6'],help="<int>: '1' for interleaved paired-end reads, '2' for separated paired-end reads, '3' for single reads, '4' for genome assembly, '5' for nanopore fasta, '6' for nanopore fastq") | 27 parser.add_argument("-t",choices=['1','2','3','4','5','6'],help="<int>: '1' for interleaved paired-end reads, '2' for separated paired-end reads, '3' for single reads, '4' for genome assembly, '5' for nanopore fasta, '6' for nanopore fastq") |
28 parser.add_argument("-b",choices=['sam','mem'],default="mem",help="<string>: algorithms for bwa mapping for allele mode; 'mem' for mem, 'sam' for samse/sampe; default=mem; optional; for now we only optimized for default 'mem' mode") | 28 parser.add_argument("-b",choices=['sam','mem'],default="mem",help="<string>: algorithms for bwa mapping for allele mode; 'mem' for mem, 'sam' for samse/sampe; default=mem; optional; for now we only optimized for default 'mem' mode") |
29 parser.add_argument("-p",default="1",help="<int>: number of threads for allele mode, if p >4, only 4 threads will be used for assembly since the amount of extracted reads is small, default=1") | 29 parser.add_argument("-p",default="1",help="<int>: number of threads for allele mode, if p >4, only 4 threads will be used for assembly since the amount of extracted reads is small, default=1") |
30 parser.add_argument("-m",choices=['k','a'],default="a",help="<string>: which workflow to apply, 'a'(raw reads allele micro-assembly), 'k'(raw reads and genome assembly k-mer), default=a") | 30 parser.add_argument("-m",choices=['k','a'],default="a",help="<string>: which workflow to apply, 'a'(raw reads allele micro-assembly), 'k'(raw reads and genome assembly k-mer), default=a") |
1238 samid_strcmd = "python " + dirpath + "/../SalmID.py -i "+fnameA | 1238 samid_strcmd = "python " + dirpath + "/../SalmID.py -i "+fnameA |
1239 #subprocess.check_call(samid_strcmd + " > Salmidoutput.txt",shell=True) | 1239 #subprocess.check_call(samid_strcmd + " > Salmidoutput.txt",shell=True) |
1240 print(samid_strcmd) | 1240 print(samid_strcmd) |
1241 salmID_output=subprocess.Popen(samid_strcmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) | 1241 salmID_output=subprocess.Popen(samid_strcmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
1242 out, err = salmID_output.communicate() | 1242 out, err = salmID_output.communicate() |
1243 print(err) | 1243 print("judge_subspecies -> err = salmID_output.communicate() " + err) |
1244 print(out) | |
1245 out=out.decode("utf-8") | 1244 out=out.decode("utf-8") |
1246 file=open("data_log.txt","a") | 1245 file=open("data_log.txt","a") |
1247 file.write(out) | 1246 file.write(out) |
1248 file.close() | 1247 file.close() |
1249 | |
1250 salm_species_scores=out.split("\n")[1].split("\t")[6:] | 1248 salm_species_scores=out.split("\n")[1].split("\t")[6:] |
1251 salm_species_results=out.split("\n")[0].split("\t")[6:] | 1249 salm_species_results=out.split("\n")[0].split("\t")[6:] |
1252 max_score=0 | 1250 max_score=0 |
1253 max_score_index=1 #default is 1, means "I" | 1251 max_score_index=1 #default is 1, means "I" |
1254 for i in range(len(salm_species_scores)): | 1252 for i in range(len(salm_species_scores)): |
1261 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part | 1259 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part |
1262 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold | 1260 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold |
1263 if max_score<60: | 1261 if max_score<60: |
1264 prediction="-" | 1262 prediction="-" |
1265 return prediction | 1263 return prediction |
1266 | |
1267 | 1264 |
1268 def judge_subspecies_Kmer(Special_dict): | 1265 def judge_subspecies_Kmer(Special_dict): |
1269 #seqsero2 -k; | 1266 #seqsero2 -k; |
1270 max_score=0 | 1267 max_score=0 |
1271 prediction="-" #default should be I | 1268 prediction="-" #default should be I |
1329 else: | 1326 else: |
1330 request_id = time.strftime("%m_%d_%Y_%H_%M_%S", time.localtime()) | 1327 request_id = time.strftime("%m_%d_%Y_%H_%M_%S", time.localtime()) |
1331 request_id += str(random.randint(1, 10000000)) | 1328 request_id += str(random.randint(1, 10000000)) |
1332 if make_dir is None: | 1329 if make_dir is None: |
1333 make_dir="SeqSero_result_"+request_id | 1330 make_dir="SeqSero_result_"+request_id |
1331 make_dir=os.path.abspath(make_dir) | |
1334 if os.path.isdir(make_dir): | 1332 if os.path.isdir(make_dir): |
1335 pass | 1333 pass |
1336 else: | 1334 else: |
1337 subprocess.check_call(["mkdir",make_dir]) | 1335 subprocess.check_call("mkdir -p "+make_dir,shell=True) |
1338 #subprocess.check_call("cp "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) | 1336 #subprocess.check_call("cp "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) |
1339 #subprocess.check_call("ln -sr "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) | 1337 #subprocess.check_call("ln -sr "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) |
1340 subprocess.check_call("ln -f -s "+seqsero2_db+" "+" ".join(input_file)+" "+make_dir,shell=True) # ed_SL_11092019: change path to database for packaging | 1338 subprocess.check_call("ln -f -s "+seqsero2_db+" "+" ".join(input_file)+" "+make_dir,shell=True) # ed_SL_11092019: change path to database for packaging |
1341 #subprocess.check_call("ln -f -s "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) ### use -f option to force the replacement of links, remove -r and use absolute path instead to avoid link issue (use 'type=os.path.abspath' in -i argument). | 1339 #subprocess.check_call("ln -f -s "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) ### use -f option to force the replacement of links, remove -r and use absolute path instead to avoid link issue (use 'type=os.path.abspath' in -i argument). |
1342 ############################begin the real analysis | 1340 ############################begin the real analysis |