Mercurial > repos > jamueller > codon_analysis
comparison GeneCodonSearch1_2.py @ 1:e8c8bf56aab0 draft default tip
Uploaded
| author | jamueller |
|---|---|
| date | Thu, 23 May 2019 05:16:03 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:caadae802e65 | 1:e8c8bf56aab0 |
|---|---|
| 1 #1: Modules | |
| 2 import re | |
| 3 import argparse | |
| 4 import numpy as np | |
| 5 np.set_printoptions(threshold=np.inf) | |
| 6 | |
| 7 parser = argparse.ArgumentParser() | |
| 8 parser.add_argument("-m", "--minimum",type=float, help="cutoff value") | |
| 9 parser.add_argument("-c", "--codon",type=str, help="codons of interest") | |
| 10 parser.add_argument("-i", "--input_sequences", type=str, help="") | |
| 11 parser.add_argument("-r", "--result", type=str) | |
| 12 | |
| 13 args = parser.parse_args() | |
| 14 | |
| 15 sequences = args.input_sequences | |
| 16 cutoff = args.minimum | |
| 17 codonlist=args.codon | |
| 18 resultfile=args.result | |
| 19 | |
| 20 codonlist = codonlist.split(",") | |
| 21 | |
| 22 i=0 | |
| 23 codon_dictionary = {} | |
| 24 while i < len(codonlist): | |
| 25 codon_dictionary[codonlist[i]] = 0 | |
| 26 i+=1 | |
| 27 #2:input | |
| 28 dict1 = {} | |
| 29 dict2 = {} | |
| 30 seq_id ="" | |
| 31 | |
| 32 i=0 | |
| 33 input_sequences = open(sequences,mode = 'r') | |
| 34 for line in input_sequences: | |
| 35 if line [0] == ">": | |
| 36 match = re.search(">(\S+)\s(.*)$",line) | |
| 37 seq_id = match.group(1) | |
| 38 dict1[seq_id] = "" | |
| 39 elif seq_id != "" and line.strip()!= "": | |
| 40 dict1[seq_id] += line.strip() | |
| 41 | |
| 42 i=0 | |
| 43 for name in dict1.keys(): | |
| 44 dict2[i] = name | |
| 45 i+=1 | |
| 46 | |
| 47 testsequences=int(len(dict2)) | |
| 48 | |
| 49 #3: Codon-Scoring of the sequences | |
| 50 i=0 | |
| 51 m=1 | |
| 52 testseq="" | |
| 53 | |
| 54 scoremat=np.zeros((len(dict2)+1,len(codon_dictionary)+2)).astype(object) | |
| 55 | |
| 56 for i in range(len(dict2)): | |
| 57 | |
| 58 l=0 | |
| 59 scorecomp=0 | |
| 60 codonscores=[] | |
| 61 testseq=dict1[dict2[i]] | |
| 62 codon_dictionary=dict.fromkeys(codon_dictionary,0) | |
| 63 | |
| 64 for l in range(len(testseq)): | |
| 65 try: | |
| 66 codon_dictionary.get(testseq[l*3:l*3+3]) | |
| 67 codon_dictionary[testseq[l*3:l*3+3]]+=1 | |
| 68 except KeyError: | |
| 69 continue | |
| 70 | |
| 71 scorecomp=sum(codon_dictionary.values())/(float(len(testseq)/3)*len(codon_dictionary)) | |
| 72 for x in codon_dictionary.values(): | |
| 73 codonscores.append(x) | |
| 74 | |
| 75 if scorecomp > cutoff : | |
| 76 scoremat[m,0]=dict2[i] | |
| 77 scoremat[m,1]=round(scorecomp,7) | |
| 78 scoremat[m,2:]=codonscores | |
| 79 m+=1 | |
| 80 | |
| 81 scoremat_sort=scoremat[np.lexsort((scoremat[:, 1], ))] | |
| 82 scoremat_sort[0,0]="Gene_ID" | |
| 83 scoremat_sort[0,1]="Total Score" | |
| 84 scoremat_sort[0,2:]=codonlist | |
| 85 scoremat_cleaned = scoremat_sort[[i for i, x in enumerate(scoremat_sort) if x.any()]] | |
| 86 | |
| 87 #4: output | |
| 88 with open(resultfile, 'w') as file_object: | |
| 89 file_object.write("DNA sequences with a higher total score than ") | |
| 90 file_object.write(str(cutoff)) | |
| 91 file_object.write("\n") | |
| 92 file_object.write("\n") | |
| 93 file_object.write(str(scoremat_cleaned)) | |
| 94 file_object.close() |
