diff make_phylip.py @ 35:ea52b23f1141

Bug fixes for Draw variants, Phylip, and gd_d_tools
author Richard Burhans <burhans@bx.psu.edu>
date Wed, 20 Nov 2013 13:46:10 -0500
parents a631c2f6d913
children 51cd0307fb70
line wrap: on
line diff
--- a/make_phylip.py	Mon Sep 23 13:37:19 2013 -0400
+++ b/make_phylip.py	Wed Nov 20 13:46:10 2013 -0500
@@ -24,6 +24,7 @@
 import errno
 import os
 import shutil
+from Population import Population
 
 def mkdir_p(path):
     try:
@@ -382,11 +383,29 @@
         #~ 
     return 0
 
+def pos_dict(gd_indivs_file, input_type):
+    rv = {}
+
+    p = Population()
+    p.from_population_file(gd_indivs_file)
+
+    for tag in p.tag_list():
+        column, name = tag.split(':')
+        column = int(column) - 1
+
+        if input_type == 'gd_genotype':
+            column -= 2
+
+        rv[name] = column
+
+    return rv
+
 def main():
     #~ 
     #~bpython mkPhyl.py --input=colugo_mt_Galaxy_genotypes.txt --chrClmn=0 --posClmn=1 --refClmn=2 --altrClmn=3 --output=out.d --gd_indivs=genotypes.gd_indivs --inputCover=colugo_mt_Galaxy_coverage.txt --gd_indivs_cover=coverage.gd_indivs --cvrgTreshold=0 --chrClmnCvrg=0 --posClmnCvrg=1 --refClmnCvrg=2 --altrClmnCvrg=3 --indvlsPrctTrshld=0
     parser = argparse.ArgumentParser(description='Returns the count of genes in KEGG categories and their statistical overrrepresentation, from a list of genes and an background file (i.e. plane text with ENSEMBLT and KEGG pathways).')
     parser.add_argument('--input',metavar='input gd_snp file',type=str,help='the input file with the table in gd_snp/gd_genotype format.',required=True)    
+    parser.add_argument('--input_type',metavar='input type',type=str,help='the input file type (gd_snp or gd_genotype)',required=True)    
     parser.add_argument('--chrClmn',metavar='int',type=int,help='the column with the chromosome.',required=True)
     parser.add_argument('--posClmn',metavar='int',type=int,help='the column with the SNPs position.',required=True)
     parser.add_argument('--refClmn',metavar='int',type=int,help='the column with the reference nucleotide.',required=True)
@@ -397,6 +416,7 @@
     parser.add_argument('--gd_indivs',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input file.',required=True)
     #~ 
     parser.add_argument('--inputCover',metavar='input gd_snp cover file',type=str,help='the input file with the table in gd_snp/gd_genotype cover format.',required=False,default=False)
+    parser.add_argument('--inputCover_type',metavar='input cover type',type=str,help='the cover input file type (gd_snp or gd_genotype)',required=False,default=False)
     parser.add_argument('--gd_indivs_cover',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input cover file.',required=False,default=False)
     parser.add_argument('--cvrgTreshold',metavar='input coverage threshold',type=int,help='the coverage threshold above which nucleotides are included, else "N".',required=False,default=False)
     parser.add_argument('--chrClmnCvrg',metavar='int',type=int,help='the column with the chromosome in the input coverage file.',required=False,default=False)
@@ -416,10 +436,11 @@
     parser.add_argument('--cdsEndClmn',metavar='int',type=int,help='the column with the coding end column in the gene_info file.',required=False,default=False)
     parser.add_argument('--startExsClmn',metavar='int',type=int,help='the column with the exon start positions column in the gene_info file.',required=False,default=False)
     parser.add_argument('--endExsClmn',metavar='int',type=int,help='the column with the exon end positions column in the gene_info file.',required=False,default=False)
-
+ 
     args = parser.parse_args()
 
     inSNPf = args.input
+    inSNPf_type = args.input_type
     outfile = args.output
     outfile_id = args.output_id
     outFastaFold = './out'
@@ -444,6 +465,7 @@
     endExsClmn = args.endExsClmn#exons end column
     
     inputCover = args.inputCover
+    inputCover_type = args.inputCover_type
     gd_indivs_cover = args.gd_indivs_cover
     cvrgTreshold = args.cvrgTreshold
     pxchrxCov = args.chrClmnCvrg
@@ -451,13 +473,13 @@
     pxntACov = args.refClmnCvrg
     pxntBCov = args.altrClmnCvrg
     indvlsPrctTrshld = args.indvlsPrctTrshld
-    
+
+
     #print inputCover, gd_indivs_cover, cvrgTreshold
     
     assert ((inputCover and gd_indivs_cover and cvrgTreshold>=0 and indvlsPrctTrshld>=0) or (inCDSfile and inUCSCfile))
-    
-    #~ 
-    dPopsinSNPfPos=dict([(x.split()[1],int(x.split()[0])-1) for x in open(gd_indivs).read().splitlines() if x.strip()])
+
+    dPopsinSNPfPos = pos_dict(gd_indivs, inSNPf_type)
     #~ dPopsinSNPfPos.update({'ref':False})
     #~ 
     sPopsIntrst=set(dPopsinSNPfPos.keys())
@@ -466,7 +488,7 @@
     #~
     if  inputCover and gd_indivs_cover and cvrgTreshold>=0:
         dPopsinSNPfPos_cover=dict([(eachPop,False) for eachPop in dPopsinSNPfPos.keys()])
-        dPopsinSNPfPos_cover.update(dict([(x.split()[1],int(x.split()[0])-1) for x in open(gd_indivs_cover).read().splitlines() if x.strip()]))
+        dPopsinSNPfPos_cover.update(pos_dict(gd_indivs_cover, inputCover_type))
         dChrdPosdPopsAlllsInit,seqref,chrx,startExs,endExs=rtrnFxdChrPos(inputCover,dPopsinSNPfPos_cover,pxchrxCov,pxposCov,pxntACov,pxntBCov,dChrdPosdPopsAlllsInit,cvrgTreshold,indvlsPrctTrshld)
         rvrse=False
         dENSEMBLTseqChrStEnEx={'tmp':(seqref,chrx,startExs,endExs,rvrse)}
@@ -482,8 +504,7 @@
     wrapSeqsFasta(dENSEMBLTPopsFasta,outFastaFold,sPopsIntrst)
     #~ 
 
-
-    ## get a lit of output files
+    ## get a list of output files
     files = []
     for dirpath, dirnames, filenames in os.walk(outFastaFold):
         for file in filenames:
@@ -500,8 +521,10 @@
         shutil.move(file, outfile)
 
         ## rename/move the rest of the files
-        for i, file in enumerate(files):
-            new_filename = 'primary_{0}_output{1}_visible_txt_?'.format(outfile_id, i+2)
+        for file in files:
+            name = os.path.basename(file)[:-4]
+            name = name.replace('_', '-')
+            new_filename = 'primary_{0}_{1}_visible_txt_?'.format(outfile_id, name)
             new_pathname = os.path.join(files_dir, new_filename)
             shutil.move(file, new_pathname)