| 
0
 | 
     1 
 | 
| 
 | 
     2 import os
 | 
| 
 | 
     3 from Bio import SeqIO
 | 
| 
 | 
     4 import sys
 | 
| 
 | 
     5 from Initial_functions import Uniq
 | 
| 
 | 
     6 from Bio.Blast import NCBIXML
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 target=sys.argv[1] #should be sra format
 | 
| 
 | 
    10 test_gene=sys.argv[2]
 | 
| 
 | 
    11 mapping_mode=sys.argv[3]
 | 
| 
 | 
    12 if sys.argv[4] not in ("1","2","3"):
 | 
| 
 | 
    13   additional_file=sys.argv[4]
 | 
| 
 | 
    14   file_mode=sys.argv[5]
 | 
| 
 | 
    15 else:
 | 
| 
 | 
    16   additional_file=""
 | 
| 
 | 
    17   file_mode=sys.argv[4]
 | 
| 
 | 
    18 
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 
 | 
| 
 | 
    22 def Copenhagen(sra_name,additional_file,mapping_mode,file_mode):
 | 
| 
 | 
    23   if file_mode=="1":#interleaved
 | 
| 
 | 
    24     if sra_name[-3:]=="sra":
 | 
| 
 | 
    25       del_fastq=1
 | 
| 
 | 
    26       for_fq=sra_name.replace(".sra","_1.fastq")
 | 
| 
 | 
    27       rev_fq=sra_name.replace(".sra","_2.fastq")
 | 
| 
 | 
    28       for_sai=sra_name.replace(".sra","_1.sai")
 | 
| 
 | 
    29       rev_sai=sra_name.replace(".sra","_2.sai")
 | 
| 
 | 
    30       sam=sra_name.replace(".sra",".sam")
 | 
| 
 | 
    31       bam=sra_name.replace(".sra",".bam")
 | 
| 
 | 
    32     else:
 | 
| 
 | 
    33       del_fastq=0
 | 
| 
 | 
    34       core_id=sra_name.split(".fastq")[0]
 | 
| 
 | 
    35       for_fq=core_id+"-read1.fastq"
 | 
| 
 | 
    36       rev_fq=core_id+"-read2.fastq"
 | 
| 
 | 
    37       for_sai=core_id+"_1.sai"
 | 
| 
 | 
    38       rev_sai=core_id+"_2.sai"
 | 
| 
 | 
    39       sam=core_id+".sam"
 | 
| 
 | 
    40       bam=core_id+".bam"
 | 
| 
 | 
    41   elif file_mode=="2":#seperated
 | 
| 
 | 
    42     forword_seq=sra_name
 | 
| 
 | 
    43     reverse_seq=additional_file
 | 
| 
 | 
    44     for_core_id=forword_seq.split(".fastq")[0]
 | 
| 
 | 
    45     re_core_id=reverse_seq.split(".fastq")[0]
 | 
| 
 | 
    46     for_fq=for_core_id+".fastq"
 | 
| 
 | 
    47     rev_fq=re_core_id+".fastq"
 | 
| 
 | 
    48     for_sai=for_core_id+".sai"
 | 
| 
 | 
    49     rev_sai=re_core_id+".sai"
 | 
| 
 | 
    50     sam=for_core_id+".sam"
 | 
| 
 | 
    51     bam=sam.replace(".sam",".bam")
 | 
| 
 | 
    52   elif file_mode=="3":#single-end
 | 
| 
 | 
    53     if sra_name[-3:]=="sra":
 | 
| 
 | 
    54         del_fastq=1
 | 
| 
 | 
    55         for_fq=sra_name.replace(".sra","_1.fastq")
 | 
| 
 | 
    56         rev_fq=sra_name.replace(".sra","_2.fastq")
 | 
| 
 | 
    57         for_sai=sra_name.replace(".sra","_1.sai")
 | 
| 
 | 
    58         rev_sai=sra_name.replace(".sra","_2.sai")
 | 
| 
 | 
    59         sam=sra_name.replace(".sra",".sam")
 | 
| 
 | 
    60         bam=sra_name.replace(".sra",".bam")
 | 
| 
 | 
    61     else:
 | 
| 
 | 
    62         del_fastq=0
 | 
| 
 | 
    63         core_id=sra_name.split(".fastq")[0]
 | 
| 
 | 
    64         for_fq=core_id+".fastq"
 | 
| 
 | 
    65         rev_fq=core_id+".fastq"
 | 
| 
 | 
    66         for_sai=core_id+"_1.sai"
 | 
| 
 | 
    67         rev_sai=core_id+"_2.sai"
 | 
| 
 | 
    68         sam=core_id+".sam"
 | 
| 
 | 
    69         bam=core_id+".bam"
 | 
| 
 | 
    70   
 | 
| 
 | 
    71   database="complete_oafA.fasta"
 | 
| 
 | 
    72   os.system("bwa index database/"+database)###01/28/2015
 | 
| 
 | 
    73   if mapping_mode=="mem":
 | 
| 
 | 
    74     os.system("bwa mem database/"+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23
 | 
| 
 | 
    75   elif mapping_mode=="sam":
 | 
| 
 | 
    76     os.system("bwa aln database/"+database+" "+for_fq+" > "+for_sai)
 | 
| 
 | 
    77     os.system("bwa aln database/"+database+" "+rev_fq+" > "+rev_sai)
 | 
| 
 | 
    78     os.system("bwa sampe database/"+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam)
 | 
| 
 | 
    79   os.system("samtools view -F 4 -Sbh "+sam+" > "+bam)
 | 
| 
 | 
    80   os.system("samtools view -h -o "+sam+" "+bam)
 | 
| 
 | 
    81   os.system("cat "+sam+"|awk '{if ($5>0) {print $10}}'>"+sam+"_seq.txt")
 | 
| 
 | 
    82   os.system("cat "+sam+"|awk '{if ($5>0) {print $1}}'>"+sam+"_title.txt")
 | 
| 
 | 
    83   file1=open(sam+"_title.txt","r")
 | 
| 
 | 
    84   file2=open(sam+"_seq.txt","r")
 | 
| 
 | 
    85   file1=file1.readlines()
 | 
| 
 | 
    86   file2=file2.readlines()
 | 
| 
 | 
    87   file=open(sam+".fasta","w")
 | 
| 
 | 
    88   for i in range(len(file1)):
 | 
| 
 | 
    89     title=">"+file1[i]
 | 
| 
 | 
    90     seq=file2[i]
 | 
| 
 | 
    91     if len(seq)>40 and (len(title)>5 or ("@" not in title)):
 | 
| 
 | 
    92       file.write(title)
 | 
| 
 | 
    93       file.write(seq)
 | 
| 
 | 
    94   file.close()
 | 
| 
 | 
    95   database2="oafA_of_O4_O5.fasta"
 | 
| 
 | 
    96   os.system('makeblastdb -in database/'+database2+' -out '+database2+'_db '+'-dbtype nucl')
 | 
| 
 | 
    97   os.system("blastn -query "+sam+".fasta"+" -db "+database2+"_db -out "+sam+"_vs_O45.xml -outfmt 5")
 | 
| 
 | 
    98   handle=open(sam+"_vs_O45.xml")
 | 
| 
 | 
    99   handle=NCBIXML.parse(handle)
 | 
| 
 | 
   100   handle=list(handle)
 | 
| 
 | 
   101   O9_bigger=0
 | 
| 
 | 
   102   O2_bigger=0
 | 
| 
 | 
   103   for x in handle:
 | 
| 
 | 
   104     O9_score=0
 | 
| 
 | 
   105     O2_score=0
 | 
| 
 | 
   106     try:
 | 
| 
 | 
   107       if 'O-4_full' in x.alignments[0].hit_def:
 | 
| 
 | 
   108         O9_score=x.alignments[0].hsps[0].bits
 | 
| 
 | 
   109         O2_score=x.alignments[1].hsps[0].bits
 | 
| 
 | 
   110       elif 'O-4_5-' in x.alignments[0].hit_def:
 | 
| 
 | 
   111         O9_score=x.alignments[1].hsps[0].bits
 | 
| 
 | 
   112         O2_score=x.alignments[0].hsps[0].bits
 | 
| 
 | 
   113       if O9_score>O2_score:
 | 
| 
 | 
   114         O9_bigger+=1
 | 
| 
 | 
   115       if O9_score<O2_score:
 | 
| 
 | 
   116         O2_bigger+=1
 | 
| 
 | 
   117     except:
 | 
| 
 | 
   118       continue
 | 
| 
 | 
   119   print "$$$Genome:",sra_name
 | 
| 
 | 
   120   if O9_bigger>O2_bigger:
 | 
| 
 | 
   121     print "$$$Typhimurium"
 | 
| 
 | 
   122   elif O9_bigger<O2_bigger:
 | 
| 
 | 
   123     print "$$$Typhimurium_O5-"
 | 
| 
 | 
   124   else:
 | 
| 
 | 
   125     print "$$$Typhimurium, even no 7 bases difference"
 | 
| 
 | 
   126   print "O-4 number is:",O9_bigger
 | 
| 
 | 
   127   print "O-4_5- number is:",O2_bigger
 | 
| 
 | 
   128   os.system("rm "+sam+"_title.txt")###01/28/2015
 | 
| 
 | 
   129   os.system("rm "+sam+"_seq.txt")###01/28/2015
 | 
| 
 | 
   130   os.system("rm "+sam+".fasta")###01/28/2015
 | 
| 
 | 
   131   os.system("rm "+database2+"_db.*")###01/28/2015
 | 
| 
 | 
   132   os.system("rm "+sam+"_vs_O45.xml")###01/28/2015
 | 
| 
 | 
   133 
 | 
| 
 | 
   134 if test_gene=="oafA":
 | 
| 
 | 
   135   Copenhagen(target,additional_file,mapping_mode,file_mode) |