changeset 1:cbdcc9621730 draft

Deleted selected files
author schang
date Tue, 02 Dec 2014 19:39:34 -0500
parents e223b827a13e
children 348ffb045343
files frp_tool.py
diffstat 1 files changed, 0 insertions(+), 300 deletions(-) [+]
line wrap: on
line diff
--- a/frp_tool.py	Tue Dec 02 19:38:14 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,300 +0,0 @@
-#!/usr/bin/env python
-
-'''
-Author: Silvia Chang
-Metagenomic Final Project
-Fall 2014
-
-Script to create fragment recruitment plots, after read-reference alignment using Bowtie (SAM output) and FR-HIT (fr-hit output) programs
-Simplified/revised version of the fragrecruit project from https://github.com/mstrosaker/fragrecruit
-
-Usage: ./frp_tool.py [options] --input --output
-
-'''
-import matplotlib.pyplot as plt
-import os, sys, getopt, imp, errno
-# import shutil
-from refseq_parser import SequenceFile
-from SAM_parser import SAMFile
-from FRHIT_parser import FRHITFile
-
-class FragmentPlotter:
-    '''Contains all functions to create fragment recruitment plots'''
-    
-    # Color list. Can only handle 5 samples. Add more colors for more samples. 
-    colors = [              
-        (0.78, 0.59, 0.17, 1.0),   #dark gold
-        (0.99, 0.60, 0.17, 1.0),   #light orange   
-        (0.60, 0.82, 0.47, 1.0),   #medium/light green
-        (0.95, 0.40, 0.40, 1.0),   #dark pink/fuschia
-        (0.17, 0.50, 0.69, 1.0),   #medium teal/blue
-        (0.85, 0.64, 0.60, 1.0),   #light plum
-    ]
-
-    def __init__(self, microbiome, pt_color, ymin=50):
-        self.microbiome = microbiome
-        self.decorated = False
-        self.pt_color = pt_color
-        self.ymin = ymin
-              
-    def new_plot(self, ref_name, contigs, begin, length):
-        ''' Clears previous data and start a blank plot'''
-        # refgenome - string, organism name of the refgenome
-        # contigs - a list containing a [name,length] pair of the refgenome
-        # begin, length - integers indicate beginning/length of plot
-        
-        plt.close()
-        # Initialize variables that will contain all the fragments information
-        self.refgenome = ref_name
-        self.contigs = contigs
-        self.begin = begin
-        self.samples = []
-        self.n_points = 0
-        self.x_points = []
-        self.y_points = []
-        self.clr_points = []
-        self.decorated = False
-        
-        self.full_length = 0
-        
-        # Builds the full length of the reference genome, or partial based on begin and length variables
-        for contig in self.contigs:
-            self.full_length += contig[1]
-        
-        # Sets the range of the x axis
-        if length != 0:
-            self.xextent = begin + length
-        else:
-            self.xextent = self.full_length
-    
-    def decorate(self):
-        ''' Decorates the plot by adding axes, axes labels, title, tickmarks, etc.'''
-        if not self.decorated:
-            self.decorated = True
-            
-            # Sets the rc parameters, add x-axis tickmarks and labels them
-            plt.rc('xtick', labelsize=8)
-            ticks = [self.begin] # beginning tick
-            ticks.append(self.xextent/2) # mid tick
-            ticks.append(self.xextent) # last tick
-            
-            labels = [int(self.begin)] # beginning nucleotide
-            labels.append(str(self.xextent/2)) # mid nucleotide
-            labels.append(str(self.xextent)) # last nucleotide
-            
-            plt.xticks(ticks, labels)
-            
-            # Sets x and y axes boundaries and labels
-            plt.xlim(self.begin, self.xextent)
-            plt.ylim(self.ymin, 100)
-            plt.ylabel("% Identity")
-            
-            # Set plot titles
-            plt.title(self.refgenome)
-            plt.suptitle(self.microbiome)
-            
-    def draw(self, filename):
-        '''Creates a scatter plot of fragments after they're added in add_points() and save the plots in the specified file name'''
-        
-        # Sets points size and shape based on number of points recruited
-        if self.n_points > 10000:
-            pt_shape = 'o'
-            pt_size = 1
-        else:
-            pt_shape ='s'
-            pt_size = 3
- 
-        # Generate the scatter plot
-#         plt.scatter(self.x_points, self.y_points, s=pt_size, c=self.clr_points, marker=pt_shape, alpha = 0.1, edgecolors='none')
-         
-        # Adds the legend on the plot. Currently, can only add one. Uncomment the plt.legend method.
-        plt.scatter(self.x_points, self.y_points, s=pt_size, c=self.clr_points, marker=pt_shape, alpha = 0.1, edgecolors='none', label=self.samples[0])
-        plt.legend(loc='lower left', ncol=3, scatterpoints=1, markerscale=8, prop={'size':10})
-        
-        # Adds titles, axes, etc
-        self.decorate()
-        
-        # Save the images
-        plt.savefig(filename, dpi=150, bbox_inches=0)
-
-    def legend(self, filename, width=4):
-        '''Create a legend for the plot'''
-        plt.figure(figsize=(width, 0.45*len(self.samples)))
-        plt.axis('off')
-        plt.ylim(-1,0.1)
-  
-        i = 0
-        ystep = 0
-        for s in self.samples:
-            plt.text(0.00, -1*ystep-0.1, 'text',
-                       backgroundcolor=self.colors[i], color=self.colors[i])
-            plt.text(0.27, -1*ystep-0.1, s)
-            i += 1
-            ystep += 0.2
-
-        plt.title("Samples")
-        plt.savefig(filename, bbox_inches=0)   
-                
-    def show(self):
-        self.decorate()
-        plt.show()
-    
-    def add_sample(self, name):
-        self.samples.append(name)
-    
-    def add_points(self, sample, ref_contig, coordinate, identity):
-        x= 0
-        
-        # Builds the reference length
-        for c  in self.contigs:
-            if (ref_contig == c[0]):
-                break
-            x += c[1]   
-            
-        # Applies new color to different samples
-        clr = self.pt_color
-        for s in self.samples:
-            if(sample == s):
-                break
-            clr += 1
-            
-        if x + coordinate >= self.begin and x + coordinate <= self.xextent:
-            self.n_points += 1
-            self.x_points.append(x + coordinate)
-            self.y_points.append(identity)
-            self.clr_points.append(self.colors[clr])               
-            
-def usage():
-    print ('''Usage:
-    %s [options] <aligned_input> <ref_genome>
-      Options
-        -h: display this help message
-        -i <int>: minimum identity to include in plot (y-axis extent)
-        -b <int>: begin the plot at the nth nucleotide in the ref genome
-        -l <int>: include n nucleotides in the x-axis of the plot
-           the default behavior is to plot the entirety of the genome
-        -t <str>: title of the plot (e.g. name of reference genome). Put in quotes. 
-        -m <str>: Name of the microbiome. Put in quotes.
-        -c <int>: number indicating color for plot points.
-           ''' % sys.argv[0])
-            
-if __name__ == '__main__':
-    # Parse all command-line arguments
-    # variable opts contains a list of the key-value pair of (option, value); variable args contains a list of the configuration file
-    try: 
-        opts, args = getopt.getopt(sys.argv[1:], 'hi:b:l:t:m:c:', ['help', 'identity=', 'begin=', 'length=', 'title=', 'microbiome=', 'color='])
-    except getopt.GetoptError as err:
-        print(str(err))
-        usage()
-        sys.exit(2)
-    
-    # Set some default values
-    identity = 50.0
-    begin = 0
-    length = 0
-    title = ''
-    microbiome=''
-    color = 0
-    
-    # Parse options and set user-defined values
-    for opt, value in opts:
-        if opt in ('-h', '--help'):
-            usage()
-            sys.exit(0)
-        elif opt in ('-i', '--identity'):
-            identity = float(value)
-        elif opt in ('-b', '--begin'):
-            begin = int(value)
-        elif opt in ('-l', '--length'):
-            length = int(length)
-        elif opt in ('-t', '--title'):
-            title = str(value)
-        elif opt in ('-m', '--microbiome'):
-            microbiome = str(value)
-        elif opt in ('-c', '--color'):
-            color = int(value)
-        else:
-            assert False, "invalid option" 
-    
-    # Verify there are input files: input and ref_genome
-    if len(args) == 0:
-        print ('Need input files.')
-        usage()
-        sys.exit
-    
-    # Load and read the configuration file. Treats the file as module object
-#     config = imp.load_source('*', args[0])
-    
-    aligned_input= args[0]
-    ref_genome = args[1]
-    
-    # Instantiate plotting object
-    plotter = FragmentPlotter(microbiome, color, ymin=identity)
-    
-           
-    # Begin the plotting commands, starting each with the reference genome
-#     for refgenome in ref_genome:
-    '''refgenome = [(name, taxonID, file_path), ..., ]'''
-    contigs = []
-    
-    # Parse the reference genome's FASTA file.
-    fasta = open(ref_genome, 'r')
-    for refseq in SequenceFile(fasta):
-        contigs.append([refseq.name.split('|')[3], len(refseq.seq)])
-    
-    fasta.close()
-    
-    # Sets the ref genome's accession # as the title if title is not set (i.e. empty)
-    if title == '':
-        title = contigs[0][0]
-    
-    # Initialize the new plot
-    plotter.new_plot(title, contigs, begin, length)
-    
-    count_samp = 0
-#     for sample in aligned_input:
-    '''sample = [(name, {taxonID:path}), ...,]'''
-#     plotter.add_sample("test")
-    count_samp += 1
-    
-    # Call the appropriate parser based on sample file extension
-#             file = '%s%s' % (config.basepath, sample[1][refgenome[1]])
-
-#     if aligned_input.endswith('.sam'):
-#         parser_func = SAMFile
-#     elif aligned_input.endswith('.fr-hit'):
-#         parser_func = FRHITFile
-#     else:
-#         print('Invalid file type')
-    
-    parser_func = SAMFile
-        
-    sample_name =""
-    for frag in parser_func(aligned_input):
-        if frag.identity >= identity:
-            sample_name = frag.name.split('.')[0]
-            plotter.add_points(frag.name, frag.refseq, frag.location, frag.identity)
-    
-    plotter.add_sample(sample_name)    
-        # Put this inside the sample for-loop to create individual plot per sample
-    plotter.draw('%s.png' % ("frp_image"))
-
-    
-#     plotter.show()
-            
-        # Draw the points and save file with taxonID as base name. Put this outside the sample for-loop to create an all-samples plot. 
-        # Put this inside the sample for-loop to create individual plot per sample
-#         plotter.draw('%s/%s.png' % (config.shortname, refgenome[1]))
-    
-#     plotter.legend('%s/legend.png' % (config.shortname))
-    
-    # Make a tarball of the resulting PNGs
-#     os.system('tar -cvf %s.tar %s >/dev/null' %
-#               ( config.shortname, config.shortname))
-#     os.system('gzip %s.tar' % (config.shortname))
-#     shutil.rmtree(config.shortname)
-            
-        
-                                                                                                                                                                                  
-        
-        
\ No newline at end of file