Mercurial > repos > miller-lab > genome_diversity
comparison make_phylip.py @ 36:51cd0307fb70
Phylip's extra ouputs are now stored in the job working directory
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Wed, 20 Nov 2013 16:32:01 -0500 |
parents | ea52b23f1141 |
children | 884ccb07885b |
comparison
equal
deleted
inserted
replaced
35:ea52b23f1141 | 36:51cd0307fb70 |
---|---|
19 # along with this program; if not, write to the Free Software | 19 # along with this program; if not, write to the Free Software |
20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | 20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
21 # MA 02110-1301, USA. | 21 # MA 02110-1301, USA. |
22 | 22 |
23 import argparse | 23 import argparse |
24 import errno | 24 import glob |
25 import os | 25 import os |
26 import shutil | 26 import shutil |
27 from Population import Population | 27 from Population import Population |
28 | |
29 def mkdir_p(path): | |
30 try: | |
31 os.makedirs(path) | |
32 except OSError, e: | |
33 if e.errno <> errno.EEXIST: | |
34 raise | |
35 | 28 |
36 def revseq(seq): | 29 def revseq(seq): |
37 seq=list(seq) | 30 seq=list(seq) |
38 seq.reverse() | 31 seq.reverse() |
39 seq=''.join(seq) | 32 seq=''.join(seq) |
355 #~ formtd='\n'.join([hader,seq]) | 348 #~ formtd='\n'.join([hader,seq]) |
356 dPopsFormPhy[eachPop]=formtd | 349 dPopsFormPhy[eachPop]=formtd |
357 #~ | 350 #~ |
358 return dPopsFormPhy,len(seq) | 351 return dPopsFormPhy,len(seq) |
359 | 352 |
360 def wrapSeqsFasta(dENSEMBLTPopsFasta,outFastaFold,sPopsIntrst): | 353 def wrapSeqsFasta(dENSEMBLTPopsFasta,sPopsIntrst): |
361 """ | 354 """ |
362 """ | 355 """ |
363 ENSEMBLTKaKs=[] | 356 ENSEMBLTKaKs=[] |
364 nonHeader=True | 357 nonHeader=True |
365 cnt=0 | 358 cnt=0 |
373 seqPMLformat=['%s %s'%(len(dPopsFormPhy),lenseq)]#generate new PHYML sequence | 366 seqPMLformat=['%s %s'%(len(dPopsFormPhy),lenseq)]#generate new PHYML sequence |
374 #~ seqPMLformat=[]#generate new PHYML sequence | 367 #~ seqPMLformat=[]#generate new PHYML sequence |
375 for namex in sorted(sPopsIntrst): | 368 for namex in sorted(sPopsIntrst): |
376 seqPMLformat.append(dPopsFormPhy[namex]) | 369 seqPMLformat.append(dPopsFormPhy[namex]) |
377 #~ | 370 #~ |
378 mkdir_p(outFastaFold) | 371 outFastaf=open('%s.phy'%ENSEMBLT,'w') |
379 outFastaf=os.path.join(outFastaFold,'%s.phy'%ENSEMBLT) | |
380 outFastaf=open(outFastaf,'w') | |
381 outFastaf.write('\n'.join(seqPMLformat)) | 372 outFastaf.write('\n'.join(seqPMLformat)) |
382 outFastaf.close() | 373 outFastaf.close() |
383 #~ | 374 #~ |
384 return 0 | 375 return 0 |
385 | 376 |
409 parser.add_argument('--chrClmn',metavar='int',type=int,help='the column with the chromosome.',required=True) | 400 parser.add_argument('--chrClmn',metavar='int',type=int,help='the column with the chromosome.',required=True) |
410 parser.add_argument('--posClmn',metavar='int',type=int,help='the column with the SNPs position.',required=True) | 401 parser.add_argument('--posClmn',metavar='int',type=int,help='the column with the SNPs position.',required=True) |
411 parser.add_argument('--refClmn',metavar='int',type=int,help='the column with the reference nucleotide.',required=True) | 402 parser.add_argument('--refClmn',metavar='int',type=int,help='the column with the reference nucleotide.',required=True) |
412 parser.add_argument('--altrClmn',metavar='int',type=int,help='the column with the derived nucleotide.',required=True) | 403 parser.add_argument('--altrClmn',metavar='int',type=int,help='the column with the derived nucleotide.',required=True) |
413 parser.add_argument('--output',metavar='output',type=str,help='the output',required=True) | 404 parser.add_argument('--output',metavar='output',type=str,help='the output',required=True) |
414 parser.add_argument('--output_id',metavar='int',type=int,help='the output id',required=True) | |
415 parser.add_argument('--output_dir',metavar='output folder sequences',type=str,help='the output folder with the sequences.',required=True) | |
416 parser.add_argument('--gd_indivs',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input file.',required=True) | 405 parser.add_argument('--gd_indivs',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input file.',required=True) |
417 #~ | 406 #~ |
418 parser.add_argument('--inputCover',metavar='input gd_snp cover file',type=str,help='the input file with the table in gd_snp/gd_genotype cover format.',required=False,default=False) | 407 parser.add_argument('--inputCover',metavar='input gd_snp cover file',type=str,help='the input file with the table in gd_snp/gd_genotype cover format.',required=False,default=False) |
419 parser.add_argument('--inputCover_type',metavar='input cover type',type=str,help='the cover input file type (gd_snp or gd_genotype)',required=False,default=False) | 408 parser.add_argument('--inputCover_type',metavar='input cover type',type=str,help='the cover input file type (gd_snp or gd_genotype)',required=False,default=False) |
420 parser.add_argument('--gd_indivs_cover',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input cover file.',required=False,default=False) | 409 parser.add_argument('--gd_indivs_cover',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input cover file.',required=False,default=False) |
440 args = parser.parse_args() | 429 args = parser.parse_args() |
441 | 430 |
442 inSNPf = args.input | 431 inSNPf = args.input |
443 inSNPf_type = args.input_type | 432 inSNPf_type = args.input_type |
444 outfile = args.output | 433 outfile = args.output |
445 outfile_id = args.output_id | |
446 outFastaFold = './out' | |
447 files_dir = args.output_dir | |
448 gd_indivs = args.gd_indivs | 434 gd_indivs = args.gd_indivs |
449 pxchrx = args.chrClmn | 435 pxchrx = args.chrClmn |
450 pxpos = args.posClmn | 436 pxpos = args.posClmn |
451 pxntA = args.refClmn | 437 pxntA = args.refClmn |
452 pxntB = args.altrClmn | 438 pxntB = args.altrClmn |
499 #~ | 485 #~ |
500 dENSEMBLTChrPosdAlls,dChrPosdPopsAllls=rtrnFxdChrPosinCodReg(dChrdStrtEndENSEMBLT,dChrdPosdPopsAlllsInit)#~ print '3. Getting fixed alleles in exons...' | 486 dENSEMBLTChrPosdAlls,dChrPosdPopsAllls=rtrnFxdChrPosinCodReg(dChrdStrtEndENSEMBLT,dChrdPosdPopsAlllsInit)#~ print '3. Getting fixed alleles in exons...' |
501 #~ | 487 #~ |
502 dENSEMBLTPopsFasta,lchrStartexEndposAll=rtrnSeqVars(dENSEMBLTseqChrStEnEx,dENSEMBLTChrPosdAlls)#~ print '4. Getting fasta sequences of populations...' | 488 dENSEMBLTPopsFasta,lchrStartexEndposAll=rtrnSeqVars(dENSEMBLTseqChrStEnEx,dENSEMBLTChrPosdAlls)#~ print '4. Getting fasta sequences of populations...' |
503 #~ | 489 #~ |
504 wrapSeqsFasta(dENSEMBLTPopsFasta,outFastaFold,sPopsIntrst) | 490 wrapSeqsFasta(dENSEMBLTPopsFasta,sPopsIntrst) |
505 #~ | 491 #~ |
506 | 492 |
507 ## get a list of output files | 493 ## get a list of output files |
508 files = [] | 494 files = glob.glob('*.phy') |
509 for dirpath, dirnames, filenames in os.walk(outFastaFold): | |
510 for file in filenames: | |
511 if file.endswith('.phy'): | |
512 files.append( os.path.join(dirpath, file) ) | |
513 del dirnames[:] | |
514 | 495 |
515 if len(files) == 0: | 496 if len(files) == 0: |
516 with open(outfile, 'w') as ofh: | 497 with open(outfile, 'w') as ofh: |
517 print >> ofh, 'No output.' | 498 print >> ofh, 'No output.' |
518 else: | 499 else: |
519 ## the first file becomes the output | 500 ## the first file becomes the output |
520 file = files.pop(0) | 501 file = files.pop(0) |
521 shutil.move(file, outfile) | 502 shutil.move(file, outfile) |
522 | 503 |
523 ## rename/move the rest of the files | 504 ## rename the rest of the files |
524 for file in files: | 505 for file in files: |
525 name = os.path.basename(file)[:-4] | 506 name = file[:-4] |
526 name = name.replace('_', '-') | 507 name = name.replace('_', '-') |
527 new_filename = 'primary_{0}_{1}_visible_txt_?'.format(outfile_id, name) | 508 new_filename = 'primary_{0}_{1}_visible_txt_?'.format(outfile_id, name) |
528 new_pathname = os.path.join(files_dir, new_filename) | 509 os.rename(file, new_filename) |
529 shutil.move(file, new_pathname) | |
530 | 510 |
531 return 0 | 511 return 0 |
532 | 512 |
533 if __name__ == '__main__': | 513 if __name__ == '__main__': |
534 main() | 514 main() |