changeset 35:ea52b23f1141

Bug fixes for Draw variants, Phylip, and gd_d_tools
author Richard Burhans <burhans@bx.psu.edu>
date Wed, 20 Nov 2013 13:46:10 -0500
parents f739a296a339
children 51cd0307fb70
files draw_variants.py make_phylip.py make_phylip.xml make_phylip_hooks.py reorder.py specify.py tool_dependencies.xml
diffstat 5 files changed, 95 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/draw_variants.py	Mon Sep 23 13:37:19 2013 -0400
+++ b/draw_variants.py	Wed Nov 20 13:46:10 2013 -0500
@@ -105,8 +105,12 @@
 
 args = [ prog ]
 args.append('-density')
-args.append(100)
+args.append(200)
+args.append('-resize')
+args.append('140%')
 args.append('Ji.svg')
+args.append('-compress')
+args.append('zip')
 args.append('tiff:{0}'.format(output))
 
 gd_util.run_program(prog, args)
--- a/make_phylip.py	Mon Sep 23 13:37:19 2013 -0400
+++ b/make_phylip.py	Wed Nov 20 13:46:10 2013 -0500
@@ -24,6 +24,7 @@
 import errno
 import os
 import shutil
+from Population import Population
 
 def mkdir_p(path):
     try:
@@ -382,11 +383,29 @@
         #~ 
     return 0
 
+def pos_dict(gd_indivs_file, input_type):
+    rv = {}
+
+    p = Population()
+    p.from_population_file(gd_indivs_file)
+
+    for tag in p.tag_list():
+        column, name = tag.split(':')
+        column = int(column) - 1
+
+        if input_type == 'gd_genotype':
+            column -= 2
+
+        rv[name] = column
+
+    return rv
+
 def main():
     #~ 
     #~bpython mkPhyl.py --input=colugo_mt_Galaxy_genotypes.txt --chrClmn=0 --posClmn=1 --refClmn=2 --altrClmn=3 --output=out.d --gd_indivs=genotypes.gd_indivs --inputCover=colugo_mt_Galaxy_coverage.txt --gd_indivs_cover=coverage.gd_indivs --cvrgTreshold=0 --chrClmnCvrg=0 --posClmnCvrg=1 --refClmnCvrg=2 --altrClmnCvrg=3 --indvlsPrctTrshld=0
     parser = argparse.ArgumentParser(description='Returns the count of genes in KEGG categories and their statistical overrrepresentation, from a list of genes and an background file (i.e. plane text with ENSEMBLT and KEGG pathways).')
     parser.add_argument('--input',metavar='input gd_snp file',type=str,help='the input file with the table in gd_snp/gd_genotype format.',required=True)    
+    parser.add_argument('--input_type',metavar='input type',type=str,help='the input file type (gd_snp or gd_genotype)',required=True)    
     parser.add_argument('--chrClmn',metavar='int',type=int,help='the column with the chromosome.',required=True)
     parser.add_argument('--posClmn',metavar='int',type=int,help='the column with the SNPs position.',required=True)
     parser.add_argument('--refClmn',metavar='int',type=int,help='the column with the reference nucleotide.',required=True)
@@ -397,6 +416,7 @@
     parser.add_argument('--gd_indivs',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input file.',required=True)
     #~ 
     parser.add_argument('--inputCover',metavar='input gd_snp cover file',type=str,help='the input file with the table in gd_snp/gd_genotype cover format.',required=False,default=False)
+    parser.add_argument('--inputCover_type',metavar='input cover type',type=str,help='the cover input file type (gd_snp or gd_genotype)',required=False,default=False)
     parser.add_argument('--gd_indivs_cover',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input cover file.',required=False,default=False)
     parser.add_argument('--cvrgTreshold',metavar='input coverage threshold',type=int,help='the coverage threshold above which nucleotides are included, else "N".',required=False,default=False)
     parser.add_argument('--chrClmnCvrg',metavar='int',type=int,help='the column with the chromosome in the input coverage file.',required=False,default=False)
@@ -416,10 +436,11 @@
     parser.add_argument('--cdsEndClmn',metavar='int',type=int,help='the column with the coding end column in the gene_info file.',required=False,default=False)
     parser.add_argument('--startExsClmn',metavar='int',type=int,help='the column with the exon start positions column in the gene_info file.',required=False,default=False)
     parser.add_argument('--endExsClmn',metavar='int',type=int,help='the column with the exon end positions column in the gene_info file.',required=False,default=False)
-
+ 
     args = parser.parse_args()
 
     inSNPf = args.input
+    inSNPf_type = args.input_type
     outfile = args.output
     outfile_id = args.output_id
     outFastaFold = './out'
@@ -444,6 +465,7 @@
     endExsClmn = args.endExsClmn#exons end column
     
     inputCover = args.inputCover
+    inputCover_type = args.inputCover_type
     gd_indivs_cover = args.gd_indivs_cover
     cvrgTreshold = args.cvrgTreshold
     pxchrxCov = args.chrClmnCvrg
@@ -451,13 +473,13 @@
     pxntACov = args.refClmnCvrg
     pxntBCov = args.altrClmnCvrg
     indvlsPrctTrshld = args.indvlsPrctTrshld
-    
+
+
     #print inputCover, gd_indivs_cover, cvrgTreshold
     
     assert ((inputCover and gd_indivs_cover and cvrgTreshold>=0 and indvlsPrctTrshld>=0) or (inCDSfile and inUCSCfile))
-    
-    #~ 
-    dPopsinSNPfPos=dict([(x.split()[1],int(x.split()[0])-1) for x in open(gd_indivs).read().splitlines() if x.strip()])
+
+    dPopsinSNPfPos = pos_dict(gd_indivs, inSNPf_type)
     #~ dPopsinSNPfPos.update({'ref':False})
     #~ 
     sPopsIntrst=set(dPopsinSNPfPos.keys())
@@ -466,7 +488,7 @@
     #~
     if  inputCover and gd_indivs_cover and cvrgTreshold>=0:
         dPopsinSNPfPos_cover=dict([(eachPop,False) for eachPop in dPopsinSNPfPos.keys()])
-        dPopsinSNPfPos_cover.update(dict([(x.split()[1],int(x.split()[0])-1) for x in open(gd_indivs_cover).read().splitlines() if x.strip()]))
+        dPopsinSNPfPos_cover.update(pos_dict(gd_indivs_cover, inputCover_type))
         dChrdPosdPopsAlllsInit,seqref,chrx,startExs,endExs=rtrnFxdChrPos(inputCover,dPopsinSNPfPos_cover,pxchrxCov,pxposCov,pxntACov,pxntBCov,dChrdPosdPopsAlllsInit,cvrgTreshold,indvlsPrctTrshld)
         rvrse=False
         dENSEMBLTseqChrStEnEx={'tmp':(seqref,chrx,startExs,endExs,rvrse)}
@@ -482,8 +504,7 @@
     wrapSeqsFasta(dENSEMBLTPopsFasta,outFastaFold,sPopsIntrst)
     #~ 
 
-
-    ## get a lit of output files
+    ## get a list of output files
     files = []
     for dirpath, dirnames, filenames in os.walk(outFastaFold):
         for file in filenames:
@@ -500,8 +521,10 @@
         shutil.move(file, outfile)
 
         ## rename/move the rest of the files
-        for i, file in enumerate(files):
-            new_filename = 'primary_{0}_output{1}_visible_txt_?'.format(outfile_id, i+2)
+        for file in files:
+            name = os.path.basename(file)[:-4]
+            name = name.replace('_', '-')
+            new_filename = 'primary_{0}_{1}_visible_txt_?'.format(outfile_id, name)
             new_pathname = os.path.join(files_dir, new_filename)
             shutil.move(file, new_pathname)
 
--- a/make_phylip.xml	Mon Sep 23 13:37:19 2013 -0400
+++ b/make_phylip.xml	Wed Nov 20 13:46:10 2013 -0500
@@ -1,19 +1,19 @@
-<tool id="gd_make_phylip" name="Phylip" version="1.0.0" force_history_refresh="True">
+<tool id="gd_make_phylip" name="Phylip" version="1.1.0" force_history_refresh="True">
   <description>: prepare data for phylogenetic analysis</description>
 
   <command interpreter="python">
     #set $zero_based = 1
-    #set $gen_chrClmn = int($input.metadata.scaffold) - $zero_based
-    #set $gen_posClmn = int($input.metadata.pos) - $zero_based
+    #set $gen_chrClmn = int($input.metadata.ref) - $zero_based
+    #set $gen_posClmn = int($input.metadata.rPos) - $zero_based
     #set $gen_refClmn = int($input.metadata.pos) - $zero_based + 1
     #set $gen_altrClmn = int($input.metadata.pos) - $zero_based + 2
-    make_phylip.py '--altrClmn=$gen_altrClmn' '--chrClmn=$gen_chrClmn' '--gd_indivs=$indivs_input' '--input=$input' '--output=$output1' '--output_id=$output1.id' '--output_dir=$__new_file_path__' '--posClmn=$gen_posClmn' '--refClmn=$gen_refClmn'
+    make_phylip.py '--altrClmn=$gen_altrClmn' '--chrClmn=$gen_chrClmn' '--gd_indivs=$indivs_input' '--input=$input' '--input_type=$input.ext' '--output=$output1' '--output_id=$output1.id' '--output_dir=$__new_file_path__' '--posClmn=$gen_posClmn' '--refClmn=$gen_refClmn'
     #if $input_type.choice == '0'
-      #set $cov_chrClmn = int($input_type.coverage_input.metadata.scaffold) - $zero_based
-      #set $cov_posClmn = int($input_type.coverage_input.metadata.pos) - $zero_based
+      #set $cov_chrClmn = int($input_type.coverage_input.metadata.ref) - $zero_based
+      #set $cov_posClmn = int($input_type.coverage_input.metadata.rPos) - $zero_based
       #set $cov_refClmn = int($input_type.coverage_input.metadata.pos) - $zero_based + 1
       #set $cov_altrClmn = int($input_type.coverage_input.metadata.pos) - $zero_based + 2
-      '--altrClmnCvrg=$cov_altrClmn' '--chrClmnCvrg=$cov_chrClmn' '--cvrgTreshold=$input_type.coverage_threshold' '--gd_indivs_cover=$indivs_input' '--indvlsPrctTrshld=$input_type.indivs_threshold' '--inputCover=$input_type.coverage_input' '--posClmnCvrg=$cov_posClmn' '--refClmnCvrg=$cov_refClmn'
+      '--altrClmnCvrg=$cov_altrClmn' '--chrClmnCvrg=$cov_chrClmn' '--cvrgTreshold=$input_type.coverage_threshold' '--gd_indivs_cover=$indivs_input' '--indvlsPrctTrshld=$input_type.indivs_threshold' '--inputCover=$input_type.coverage_input' '--inputCover_type=$input_type.coverage_input.ext' '--posClmnCvrg=$cov_posClmn' '--refClmnCvrg=$cov_refClmn'
     #else if $input_type.choice == '1'
       #set $fchrClmn = int($input_type.annotation_input.metadata.chromCol) - $zero_based
       #set $strandClmn = int($input_type.annotation_input.metadata.strandCol) - $zero_based
@@ -28,6 +28,8 @@
     #end if
   </command>
 
+  <code file="make_phylip_hooks.py" />
+
   <inputs>
     <param name="input" type="data" format="gd_genotype,gd_snp" label="Genotype/SNP dataset">
       <validator type="metadata" check="scaffold" message="scaffold missing" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make_phylip_hooks.py	Wed Nov 20 13:46:10 2013 -0500
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+def exec_before_job(app, inp_data=None, out_data=None, tool=None, param_dict=None):
+    pass
+
+def exec_after_process(app, inp_data=None, out_data=None, param_dict=None, tool=None, stdout=None, stderr=None):
+    output_name = 'output1'
+
+    ## check for output
+    try:
+        first_output = out_data[output_name]
+    except:
+        return
+
+    ## check for collected datasets
+    try:
+        collected_dict = param_dict['__collected_datasets__']['primary'][output_name]
+    except:
+        return
+
+    if len(collected_dict.keys()) == 0:
+        return
+
+    ## check for fasta file
+    try:
+        fasta_file = inp_data['fasta_input']
+    except:
+        return
+
+    ## find missing fasta header
+    first_output_name = None
+    with open(fasta_file.get_file_name()) as fh:
+        for line in fh:
+            if line[0] != '>':
+                continue
+            name = line[1:]
+            name = name.strip()
+            name = name.split()[0]
+            name = name.replace('_', '-')
+            if name not in collected_dict:
+                first_output_name = name
+                break
+
+    ## fix name
+    if first_output_name is not None:
+        first_output.name = '%s (%s)' % (first_output.name, first_output_name)
+
--- a/tool_dependencies.xml	Mon Sep 23 13:37:19 2013 -0400
+++ b/tool_dependencies.xml	Wed Nov 20 13:46:10 2013 -0500
@@ -12,7 +12,7 @@
     <repository prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu/" owner="miller-lab" name="package_fisher_0_1_4" changeset_revision="c84c287b81a4" />
   </package>
   <package name="gd_c_tools" version="0.1">
-    <repository prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu/" owner="miller-lab" name="package_gd_c_tools_0_1" changeset_revision="7361ee4b5f40" />
+    <repository prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu/" owner="miller-lab" name="package_gd_c_tools_0_1" changeset_revision="989859342136" />
   </package>
   <package name="matplotlib" version="1.2.1">
     <repository prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu/" owner="iuc" name="package_matplotlib_1_2" changeset_revision="9d164359606b" />