Mercurial > repos > estrain > seqsero_v1
diff SeqSero/libs/deletion_compare.py @ 0:c577b57b7c74 draft
Uploaded
author | estrain |
---|---|
date | Wed, 06 Dec 2017 15:59:29 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SeqSero/libs/deletion_compare.py Wed Dec 06 15:59:29 2017 -0500 @@ -0,0 +1,135 @@ + +import os +from Bio import SeqIO +import sys +from Initial_functions import Uniq +from Bio.Blast import NCBIXML + + +target=sys.argv[1] #should be sra format +test_gene=sys.argv[2] +mapping_mode=sys.argv[3] +if sys.argv[4] not in ("1","2","3"): + additional_file=sys.argv[4] + file_mode=sys.argv[5] +else: + additional_file="" + file_mode=sys.argv[4] + + + + +def Copenhagen(sra_name,additional_file,mapping_mode,file_mode): + if file_mode=="1":#interleaved + if sra_name[-3:]=="sra": + del_fastq=1 + for_fq=sra_name.replace(".sra","_1.fastq") + rev_fq=sra_name.replace(".sra","_2.fastq") + for_sai=sra_name.replace(".sra","_1.sai") + rev_sai=sra_name.replace(".sra","_2.sai") + sam=sra_name.replace(".sra",".sam") + bam=sra_name.replace(".sra",".bam") + else: + del_fastq=0 + core_id=sra_name.split(".fastq")[0] + for_fq=core_id+"-read1.fastq" + rev_fq=core_id+"-read2.fastq" + for_sai=core_id+"_1.sai" + rev_sai=core_id+"_2.sai" + sam=core_id+".sam" + bam=core_id+".bam" + elif file_mode=="2":#seperated + forword_seq=sra_name + reverse_seq=additional_file + for_core_id=forword_seq.split(".fastq")[0] + re_core_id=reverse_seq.split(".fastq")[0] + for_fq=for_core_id+".fastq" + rev_fq=re_core_id+".fastq" + for_sai=for_core_id+".sai" + rev_sai=re_core_id+".sai" + sam=for_core_id+".sam" + bam=sam.replace(".sam",".bam") + elif file_mode=="3":#single-end + if sra_name[-3:]=="sra": + del_fastq=1 + for_fq=sra_name.replace(".sra","_1.fastq") + rev_fq=sra_name.replace(".sra","_2.fastq") + for_sai=sra_name.replace(".sra","_1.sai") + rev_sai=sra_name.replace(".sra","_2.sai") + sam=sra_name.replace(".sra",".sam") + bam=sra_name.replace(".sra",".bam") + else: + del_fastq=0 + core_id=sra_name.split(".fastq")[0] + for_fq=core_id+".fastq" + rev_fq=core_id+".fastq" + for_sai=core_id+"_1.sai" + rev_sai=core_id+"_2.sai" + sam=core_id+".sam" + bam=core_id+".bam" + + database="complete_oafA.fasta" + os.system("bwa index database/"+database)###01/28/2015 + if mapping_mode=="mem": + os.system("bwa mem database/"+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23 + elif mapping_mode=="sam": + os.system("bwa aln database/"+database+" "+for_fq+" > "+for_sai) + os.system("bwa aln database/"+database+" "+rev_fq+" > "+rev_sai) + os.system("bwa sampe database/"+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam) + os.system("samtools view -F 4 -Sbh "+sam+" > "+bam) + os.system("samtools view -h -o "+sam+" "+bam) + os.system("cat "+sam+"|awk '{if ($5>0) {print $10}}'>"+sam+"_seq.txt") + os.system("cat "+sam+"|awk '{if ($5>0) {print $1}}'>"+sam+"_title.txt") + file1=open(sam+"_title.txt","r") + file2=open(sam+"_seq.txt","r") + file1=file1.readlines() + file2=file2.readlines() + file=open(sam+".fasta","w") + for i in range(len(file1)): + title=">"+file1[i] + seq=file2[i] + if len(seq)>40 and (len(title)>5 or ("@" not in title)): + file.write(title) + file.write(seq) + file.close() + database2="oafA_of_O4_O5.fasta" + os.system('makeblastdb -in database/'+database2+' -out '+database2+'_db '+'-dbtype nucl') + os.system("blastn -query "+sam+".fasta"+" -db "+database2+"_db -out "+sam+"_vs_O45.xml -outfmt 5") + handle=open(sam+"_vs_O45.xml") + handle=NCBIXML.parse(handle) + handle=list(handle) + O9_bigger=0 + O2_bigger=0 + for x in handle: + O9_score=0 + O2_score=0 + try: + if 'O-4_full' in x.alignments[0].hit_def: + O9_score=x.alignments[0].hsps[0].bits + O2_score=x.alignments[1].hsps[0].bits + elif 'O-4_5-' in x.alignments[0].hit_def: + O9_score=x.alignments[1].hsps[0].bits + O2_score=x.alignments[0].hsps[0].bits + if O9_score>O2_score: + O9_bigger+=1 + if O9_score<O2_score: + O2_bigger+=1 + except: + continue + print "$$$Genome:",sra_name + if O9_bigger>O2_bigger: + print "$$$Typhimurium" + elif O9_bigger<O2_bigger: + print "$$$Typhimurium_O5-" + else: + print "$$$Typhimurium, even no 7 bases difference" + print "O-4 number is:",O9_bigger + print "O-4_5- number is:",O2_bigger + os.system("rm "+sam+"_title.txt")###01/28/2015 + os.system("rm "+sam+"_seq.txt")###01/28/2015 + os.system("rm "+sam+".fasta")###01/28/2015 + os.system("rm "+database2+"_db.*")###01/28/2015 + os.system("rm "+sam+"_vs_O45.xml")###01/28/2015 + +if test_gene=="oafA": + Copenhagen(target,additional_file,mapping_mode,file_mode) \ No newline at end of file