Mercurial > repos > glogobyte > armdb
comparison armdb_mirbase.py @ 17:150d8995b9aa draft default tip
Uploaded
| author | glogobyte |
|---|---|
| date | Mon, 16 Oct 2023 14:27:43 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 16:d9087bba59e1 | 17:150d8995b9aa |
|---|---|
| 1 import subprocess | |
| 2 import argparse | |
| 3 import time | |
| 4 import urllib.request | |
| 5 from multiprocessing import Process, Queue | |
| 6 import itertools | |
| 7 | |
| 8 #---------------------------------Arguments------------------------------------------ | |
| 9 | |
| 10 subprocess.call(['mkdir', 'out']) | |
| 11 parser = argparse.ArgumentParser() | |
| 12 | |
| 13 parser.add_argument("-pos", "--positions", help="number of additional nucleotides", action="store") | |
| 14 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") | |
| 15 parser.add_argument("-gen", "--genome", help="genome version", action="store") | |
| 16 parser.add_argument("-gff3", "--gff", help="gff file",action="store") | |
| 17 args = parser.parse_args() | |
| 18 | |
| 19 #-----------------------Download and read of the gff3 file--------------------------------- | |
| 20 | |
| 21 def read_url(q): | |
| 22 | |
| 23 url = 'https://mirbase.org/download/'+args.gff | |
| 24 data = urllib.request.urlopen(url).read() | |
| 25 file_mirna = data.decode('utf-8') | |
| 26 file_mirna = file_mirna.split("\n") | |
| 27 q.put(file_mirna) | |
| 28 | |
| 29 | |
| 30 #-----------------------Export of the original gff3 file--------------------------------- | |
| 31 | |
| 32 def write_gff(file_mirna): | |
| 33 f = open('original_mirnas.bed', "w") | |
| 34 | |
| 35 for i in range(len(file_mirna)): | |
| 36 f.write(file_mirna[i] + "\n") | |
| 37 | |
| 38 #------------------------Process and export of the file with mature mirnas------------------------------- | |
| 39 | |
| 40 def new_gff(file_mirna): | |
| 41 | |
| 42 mirna = [] # new list with shifted mirnas | |
| 43 positions =int(args.positions) # positions shifted | |
| 44 print(str(positions)+" positions shifted") | |
| 45 names=[] | |
| 46 | |
| 47 for i in range(len(file_mirna)): | |
| 48 | |
| 49 # Remove lines which conatain the word "primary" | |
| 50 if "primary" not in file_mirna[i]: | |
| 51 mirna.append(file_mirna[i]) | |
| 52 | |
| 53 # Check if the line starts with "chr" | |
| 54 if "chr" in file_mirna[i]: | |
| 55 a=file_mirna[i].split("\t")[0] | |
| 56 b=file_mirna[i].split("\t")[6] | |
| 57 c=file_mirna[i].split("=")[3].split(";")[0] | |
| 58 names.append([a,b,c]) | |
| 59 | |
| 60 names.sort() | |
| 61 sublists=[] | |
| 62 | |
| 63 [sublists.append([item] * names.count(item)) for item in names if names.count(item)>=2] | |
| 64 sublists.sort() | |
| 65 sublists=list(sublists for sublists, _ in itertools.groupby(sublists)) | |
| 66 unique_names=[[x[0][0],x[0][2]] for x in sublists] | |
| 67 | |
| 68 for x in unique_names: | |
| 69 flag = 0 | |
| 70 for i in range(len(mirna)): | |
| 71 | |
| 72 if "chr" in mirna[i] and mirna[i].split("=")[3].split(";")[0]==x[1] and x[0]==mirna[i].split("\t")[0]: | |
| 73 flag+=1 | |
| 74 ktr=mirna[i].split(";")[0]+";"+mirna[i].split(";")[1]+";"+mirna[i].split(";")[2]+"-{"+str(flag)+"}"+";"+mirna[i].split(";")[3] | |
| 75 mirna[i]=ktr | |
| 76 | |
| 77 | |
| 78 f = open('shifted_mirnas.bed', "w") | |
| 79 | |
| 80 for i in range(len(mirna)): | |
| 81 | |
| 82 if "chr" in mirna[i]: | |
| 83 | |
| 84 # change the name of current mirna | |
| 85 mirna_name_1 = mirna[i].split("=")[3] | |
| 86 mirna_name_2 = mirna[i].split("=")[4] | |
| 87 mirna_name_1 = mirna_name_1.split(";")[0]+"_"+mirna_name_2+"_"+mirna[i].split("\t")[0] | |
| 88 mirna[i] = mirna[i].replace("miRNA", mirna_name_1) | |
| 89 | |
| 90 # Shift the position of mirna | |
| 91 start = mirna[i].split("\t")[3] | |
| 92 end = mirna[i].split("\t")[4] | |
| 93 shift_start = int(start)-positions # shift the interval to the left | |
| 94 shift_end = int(end)+positions # shift the interval to the right | |
| 95 | |
| 96 # Replace the previous intervals with the new | |
| 97 mirna[i] = mirna[i].replace(start, str(shift_start)) | |
| 98 mirna[i] = mirna[i].replace(end, str(shift_end)) | |
| 99 | |
| 100 f.write(mirna[i] + "\n") | |
| 101 | |
| 102 f.close() | |
| 103 | |
| 104 | |
| 105 #------------------------Extract the sequences of the Custom Arms with getfasta tool------------------------------- | |
| 106 | |
| 107 def bedtool(genome): | |
| 108 subprocess.call(["bedtools", "getfasta", "-fi", "/cvmfs/data.galaxyproject.org/byhand/"+genome+"/sam_index/"+genome+".fa", "-bed", "shifted_mirnas.bed", "-fo", "new_ref.fa", "-name", "-s"]) | |
| 109 #=================================================================================================================================== | |
| 110 | |
| 111 | |
| 112 if __name__=='__main__': | |
| 113 | |
| 114 starttime = time.time() | |
| 115 q = Queue() | |
| 116 | |
| 117 # Read the original gff3 file | |
| 118 p1 = Process(target=read_url(q)) | |
| 119 p1.start() | |
| 120 p1.join() | |
| 121 file_mirna=q.get() | |
| 122 | |
| 123 # Export the original gff3 file | |
| 124 p2 = [Process(target=write_gff(file_mirna))] | |
| 125 | |
| 126 # Create the new gff3 file | |
| 127 p2.extend([Process(target=new_gff(file_mirna))]) | |
| 128 [x.start() for x in p2] | |
| 129 [x.join() for x in p2] | |
| 130 | |
| 131 # Extract the sequences of the Custom Arms | |
| 132 p3 = Process(target=bedtool(args.genome)) | |
| 133 p3.start() | |
| 134 p3.join() | |
| 135 | |
| 136 print('Runtime: {} seconds'.format(round(time.time() - starttime,2))) | |
| 137 |
