Mercurial > repos > johnheap > vapper
comparison Tryp_V_T.py @ 21:1b5bf8383973 draft
Uploaded
| author | johnheap |
|---|---|
| date | Tue, 04 Jun 2019 01:29:12 -0400 |
| parents | fe79425b1fa4 |
| children | c0a6a170163e |
comparison
equal
deleted
inserted
replaced
| 20:26ec953069b3 | 21:1b5bf8383973 |
|---|---|
| 81 return #we have saved out the relevent name.bam, name.sorted and name.sorted.bai files | 81 return #we have saved out the relevent name.bam, name.sorted and name.sorted.bai files |
| 82 | 82 |
| 83 # we will not have the .gtf file so call cufflinks without -G option | 83 # we will not have the .gtf file so call cufflinks without -G option |
| 84 def transcriptAbundance(inputname): | 84 def transcriptAbundance(inputname): |
| 85 argString = "cufflinks -o "+inputname+".cuff -u -p 8 "+inputname+".sorted" | 85 argString = "cufflinks -o "+inputname+".cuff -u -p 8 "+inputname+".sorted" |
| 86 subprocess.call(argString, shell = True) | 86 subprocess.call(argString, shell=True) |
| 87 os.remove(inputname+".sorted") #remove name.sorted | 87 os.remove(inputname+".sorted") #remove name.sorted |
| 88 os.remove(inputname+".sorted.bai") | 88 os.remove(inputname+".sorted.bai") |
| 89 os.remove(inputname+".bam") | 89 os.remove(inputname+".bam") |
| 90 return | 90 return |
| 91 | 91 |
| 94 refBase = os.path.basename(refFastq) | 94 refBase = os.path.basename(refFastq) |
| 95 ref = os.path.splitext(refBase)[0] # 'mydata/test.fasta' -> 'test' | 95 ref = os.path.splitext(refBase)[0] # 'mydata/test.fasta' -> 'test' |
| 96 dir_path = os.path.dirname(os.path.realpath(__file__)) # directory of this file | 96 dir_path = os.path.dirname(os.path.realpath(__file__)) # directory of this file |
| 97 refPath = dir_path + "/data/Reference/" + ref + "/" + refBase # eg refPath = data/Reference/Trinity/Trinity.fasta | 97 refPath = dir_path + "/data/Reference/" + ref + "/" + refBase # eg refPath = data/Reference/Trinity/Trinity.fasta |
| 98 # used for dirty # refPath = 'Trinity.fasta' # dirty one | 98 # used for dirty # refPath = 'Trinity.fasta' # dirty one |
| 99 track_df = pd.read_csv(name+'.cuff/genes.fpkm_tracking', sep='\t') | 99 track_df = pd.read_csv(dir_path+'/' + name + '.cuff/genes.fpkm_tracking', sep='\t') |
| 100 names = track_df['locus'] | 100 names = track_df['locus'] |
| 101 # print(len(names)) | 101 # print(len(names)) |
| 102 # print(names[:5]) | 102 # print(names[:5]) |
| 103 | 103 |
| 104 nlist = [] | 104 nlist = [] |
| 232 def getPhyloNumber(sac): | 232 def getPhyloNumber(sac): |
| 233 i = sac.find('_') | 233 i = sac.find('_') |
| 234 return int(sac[1:i]) | 234 return int(sac[1:i]) |
| 235 | 235 |
| 236 def combineFPMK(tdict): | 236 def combineFPMK(tdict): |
| 237 fpkm_df = pd.read_csv(tdict['name']+'.cuff/genes.fpkm_tracking', sep='\t') | 237 dir_path = os.path.dirname(os.path.realpath(__file__))+'/' |
| 238 | |
| 239 fpkm_df = pd.read_csv(dir_path++tdict['name']+'.cuff/genes.fpkm_tracking', sep='\t') | |
| 238 | 240 |
| 239 #fpkm_df = pd.read_csv('genes.fpkm_tracking',sep='\t') | 241 #fpkm_df = pd.read_csv('genes.fpkm_tracking',sep='\t') |
| 240 #print(fpkm_df.head()) | 242 #print(fpkm_df.head()) |
| 241 fpkm_df['locus'] = fpkm_df['locus'].apply(lambda names: names[:names.find(':')]) | 243 fpkm_df['locus'] = fpkm_df['locus'].apply(lambda names: names[:names.find(':')]) |
| 242 #print(fpkm_df.head()) | 244 #print(fpkm_df.head()) |
| 243 reducedBlast_df = pd.read_csv(tdict['name']+'_transcript.csv') | 245 |
| 246 reducedBlast_df = pd.read_csv(dir_path + tdict['name']+'_transcript.csv') | |
| 244 # reducedBlast_df = pd.read_csv('TrinityVT_transcript.csv') | 247 # reducedBlast_df = pd.read_csv('TrinityVT_transcript.csv') |
| 245 saccverSet = set(reducedBlast_df['saccver']) | 248 saccverSet = set(reducedBlast_df['saccver']) |
| 246 saccverList = list(saccverSet) | 249 saccverList = list(saccverSet) |
| 247 saccverList.sort() | 250 saccverList.sort() |
| 248 # print(saccverList[:5]) | 251 # print(saccverList[:5]) |
| 396 pass | 399 pass |
| 397 | 400 |
| 398 # argdict = {'name':2, 'pdfexport': 3, 'refFastq': 4, 'forward': 5, 'reverse': 6, 'html_file': 7, 'html_resource': 8} | 401 # argdict = {'name':2, 'pdfexport': 3, 'refFastq': 4, 'forward': 5, 'reverse': 6, 'html_file': 7, 'html_resource': 8} |
| 399 | 402 |
| 400 def transcriptomicProcess(args,argdict): | 403 def transcriptomicProcess(args,argdict): |
| 404 dir_path = os.path.dirname(os.path.realpath(__file__)) | |
| 401 tdict = {} | 405 tdict = {} |
| 402 tdict['name'] = args[argdict['name']] | 406 tdict['name'] = args[argdict['name']] |
| 403 tdict['refFastq'] = args[argdict['refFastq']] | 407 tdict['refFastq'] = args[argdict['refFastq']] |
| 404 tdict['forward'] = args[argdict['forward']] | 408 tdict['forward'] = args[argdict['forward']] |
| 405 tdict['reverse'] = args[argdict['reverse']] | 409 tdict['reverse'] = args[argdict['reverse']] |
| 406 tdict['vivax_trans_database'] = 'data/vivax/Database/Phylotype_typeseqs.fas' | 410 dir_path = os.path.dirname(os.path.realpath(__file__)) |
| 411 tdict['vivax_trans_database'] = dir_path+'/data/vivax/Database/Phylotype_typeseqs.fas' | |
| 407 tdict['pdf'] = args[argdict['pdfexport']] | 412 tdict['pdf'] = args[argdict['pdfexport']] |
| 408 tdict['html_file'] = args[argdict['html_file']] | 413 tdict['html_file'] = args[argdict['html_file']] |
| 409 tdict['html_resource'] = args[argdict['html_resource']] | 414 tdict['html_resource'] = args[argdict['html_resource']] |
| 410 | 415 |
| 411 uploadUserReferenceFastq(tdict['refFastq']) | 416 uploadUserReferenceFastq(tdict['refFastq']) |
| 412 transcriptMapping(tdict['name'], tdict['refFastq'], tdict['forward'], tdict['reverse']) #uses bowtie | 417 transcriptMapping(tdict['name'], tdict['refFastq'], tdict['forward'], tdict['reverse']) #uses bowtie |
| 413 processSamFiles(tdict['name']) #uses samtools | 418 processSamFiles(tdict['name']) #uses samtools |
| 414 transcriptAbundance(tdict['name']) #uses cufflinks -> ?.cuff/*.* | 419 transcriptAbundance(tdict['name']) #uses cufflinks -> ?.cuff/*.* |
| 415 transcriptsForBlast(tdict['name'], tdict['refFastq']) #creates name+4blast.fa | 420 transcriptsForBlast(tdict['name'], tdict['refFastq']) #creates name+4blast.fa |
| 416 blastContigs(tdict['name'], tdict['html_resource'], 'data/vivax/Database/Phylotype_typeseqs.fas') | 421 blastContigs(tdict['name'], tdict['html_resource'], tdict['vivax_trans_database']) |
| 417 sum_df, sum2_df = combineFPMK(tdict) | 422 sum_df, sum2_df = combineFPMK(tdict) |
| 418 doBarChart(tdict, sum2_df) | 423 doBarChart(tdict, sum2_df) |
| 419 createHTML(tdict, sum_df) | 424 createHTML(tdict, sum_df) |
| 420 | 425 |
| 421 | 426 |
