# HG changeset patch # User glogobyte # Date 1634142193 0 # Node ID d133caf020a7d5ff9eadc337226d50ecfcd5ad08 # Parent 6c267b9256fa50a0ab8dd538c37e42189ef45fea Uploaded diff -r 6c267b9256fa -r d133caf020a7 armdb_mirgene.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/armdb_mirgene.py Wed Oct 13 16:23:13 2021 +0000 @@ -0,0 +1,90 @@ +import subprocess +import argparse +import time +import urllib.request +from multiprocessing import Process, Queue + +#--------------------------------------------Arguments----------------------------------------------- +subprocess.call(['mkdir', 'out']) +parser = argparse.ArgumentParser() +parser.add_argument("-pos", "--positions", help="", action="store") +parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") +parser.add_argument("-sym", "--symbol", help="",action="store") +args = parser.parse_args() + +#------------------------------------Read Fasta files from MirGene----------------------------------- + +def read_url(path): + + data = urllib.request.urlopen(path).read() + data1 = data.decode('utf-8') + data1 = data1.split("\n") + del data1[-1] + q.put(data1) + +#-------------------------------------Generation of Custom Arms-------------------------------------- + +def custom_arms_mirgene(mat_mirna,pri_mirna): + mat_ext=[] + for i in range(0,len(mat_mirna),2): + if args.symbol in mat_mirna[i]: + mat_seq=mat_mirna[i+1] + + for j in range(0,len(pri_mirna),2): + if mat_mirna[i].split("_")[0] == pri_mirna[j].split("_")[0]: + temp_ext = pri_mirna[j+1].split(mat_seq) + mat_ext.append([mat_mirna[i],temp_ext[0][-int(args.positions):]+mat_seq+temp_ext[1][0:int(args.positions)]]) + + print(str(args.positions)+" positions shifted") + return(mat_ext) + +#----------------------------------------Export of Fasta files --------------------------------------- + +def write_custom_arms(list,name,c): + + f = open(name, "w") + for x in list: + if c==1: + f.write(x[0]+'\n') + f.write(x[1]+'\n') + else: + f.write(x+'\n') + +#================================================================================================================================== + +if __name__=='__main__': + + starttime = time.time() + print(args.symbol) + q = Queue() + + # Read of all Fasta files + mat_url = 'https://mirgenedb.org/fasta/ALL?mat=1' + p = [Process(target=read_url(mat_url))] + star_url = 'https://mirgenedb.org/fasta/ALL?star=1' + p.extend([Process(target=read_url(star_url))]) + pri_url = 'https://mirgenedb.org/static/data/ALL/ALL--pri-30-30.fas' + p.extend([Process(target=read_url(pri_url))]) + [x.start() for x in p] + [x.join() for x in p] + + mat_mirna=q.get() + star_mirna=q.get() + pri_mirna=q.get() + + # Replace U with T in seqeunces + mat_mirna.extend(star_mirna) + for i in range(1,len(mat_mirna),2): + mat_mirna[i]=mat_mirna[i].replace("U","T") + + #Generation of Custom Arms + mat_ext=custom_arms_mirgene(mat_mirna,pri_mirna) + + #Export of all Fasta files + p1 = [Process(target=write_custom_arms(mat_mirna,"shifted_mirnas.bed",0))] + p1.extend([Process(target=write_custom_arms(pri_mirna,"original_mirnas.bed",0))]) + p1.extend([Process(target=write_custom_arms(mat_ext,"new_ref.fa",1))]) + [x.start() for x in p1] + [x.join() for x in p1] + + print('That runtime was {} seconds'.format(time.time() - starttime))