changeset 45:f4b26211e3d8 draft

Uploaded
author charles-bernard
date Thu, 21 Dec 2017 12:58:57 -0500
parents 3a051528e47d
children 5d745637f045
files alfa/ALFA.py alfa/ALFA_wrapper.py
diffstat 2 files changed, 45 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/alfa/ALFA.py	Thu Dec 21 11:56:24 2017 -0500
+++ b/alfa/ALFA.py	Thu Dec 21 12:58:57 2017 -0500
@@ -18,7 +18,7 @@
 from matplotlib.backends.backend_pdf import PdfPages
 # To correctly embed the texts when saving plots in svg format
 import matplotlib
-import progressbar
+# import progressbar
 import collections
 import matplotlib as mpl
 import numpy as np
@@ -352,14 +352,14 @@
             stranded_index_fh.write("#%s\t%s\n" % (key, value))
     # Progress bar to track the genome indexes creation
     nb_lines = sum(1 for _ in open(annotation))
-    pbar = progressbar.ProgressBar(widgets=["Indexing the genome ", progressbar.Percentage(), " ", progressbar.Bar(), progressbar.Timer()], maxval=nb_lines).start()
+    # pbar = progressbar.ProgressBar(widgets=["Indexing the genome ", progressbar.Percentage(), " ", progressbar.Bar(), progressbar.Timer()], maxval=nb_lines).start()
     # Browsing the GTF file and writing into genome index files
     with open(annotation, "r") as gtf_fh:
         for line in gtf_fh:
             i += 1
             # Update the progressbar every 1k lines
-            if i % 1000 == 1:
-                pbar.update(i)
+            # if i % 1000 == 1:
+                # pbar.update(i)
             # Processing lines except comment ones
             if not line.startswith("#"):
                 # Getting the line info
@@ -437,7 +437,7 @@
 
         # Store the categories of the last chromosome
         register_interval(intervals_dict, chrom, stranded_genome_index, unstranded_genome_index)
-    pbar.finish()
+    # pbar.finish()
 
 
 def generate_bedgraph_files(sample_labels, bam_files):
@@ -446,9 +446,9 @@
     #sample_labels = []
     # Progress bar to track the BedGraph file creation
     #pbar = progressbar.ProgressBar(widgets=["Generating the BedGraph files ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], max_value=len(bam_files)+1).start()
-    pbar = progressbar.ProgressBar(widgets=["Generating the BedGraph files ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], maxval=len(sample_labels)+1).start()
+    # pbar = progressbar.ProgressBar(widgets=["Generating the BedGraph files ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], maxval=len(sample_labels)+1).start()
     n = 1
-    pbar.update(n)
+    # pbar.update(n)
     #for n in range(0, len(bam_files), 2):
     for sample_label, bam_file in zip(sample_labels, bam_files):
         # Get the label for this sample
@@ -458,23 +458,23 @@
         if options.strandness in ["forward", "fr-firststrand"]:
             #subprocess.call("bedtools genomecov -bg -split -strand + -ibam " + bam_files[n] + " > " + modified_label + ".plus.bedgraph", shell=True)
             subprocess.call("bedtools genomecov -bg -split -strand + -ibam " + bam_file + " > " + sample_label + ".plus.bedgraph", shell=True)
-            pbar.update(n + 0.5)
+            # pbar.update(n + 0.5)
             #subprocess.call("bedtools genomecov -bg -split -strand - -ibam " + bam_files[n] + " > " + modified_label + ".minus.bedgraph", shell=True)
             subprocess.call("bedtools genomecov -bg -split -strand - -ibam " + bam_file + " > " + sample_label + ".minus.bedgraph", shell=True)
-            pbar.update(n + 0.5)
+            # pbar.update(n + 0.5)
         elif options.strandness in ["reverse", "fr-secondstrand"]:
             #subprocess.call("bedtools genomecov -bg -split -strand - -ibam " + bam_files[n] + " > " + modified_label + ".plus.bedgraph", shell=True)
             subprocess.call("bedtools genomecov -bg -split -strand - -ibam " + bam_file + " > " + sample_label + ".plus.bedgraph", shell=True)
-            pbar.update(n + 0.5)
+            # pbar.update(n + 0.5)
             #subprocess.call("bedtools genomecov -bg -split -strand + -ibam " + bam_files[n] + " > " + modified_label + ".minus.bedgraph", shell=True)
             subprocess.call("bedtools genomecov -bg -split -strand + -ibam " + bam_file + " > " + sample_label + ".minus.bedgraph", shell=True)
-            pbar.update(n + 0.5)
+            # pbar.update(n + 0.5)
         else:
             #subprocess.call("bedtools genomecov -bg -split -ibam " + bam_files[n] + " > " + modified_label + ".bedgraph", shell=True)
             subprocess.call("bedtools genomecov -bg -split -ibam " + bam_file + " > " + sample_label + ".bedgraph", shell=True)
-            pbar.update(n + 1)
+            # pbar.update(n + 1)
         #sample_files.append(modified_label)
-    pbar.finish()
+    # pbar.finish()
     #return sample_files, sample_labels
     return None
 
@@ -579,7 +579,7 @@
 
         # Progress bar to track the BedGraph and index intersection
         #pbar = progressbar.ProgressBar(widgets=["Processing " + sample_file + " ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], max_value=nb_lines).start()
-        pbar = progressbar.ProgressBar(widgets=["Processing " + sample_label + " ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], maxval=nb_lines).start()
+        # pbar = progressbar.ProgressBar(widgets=["Processing " + sample_label + " ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], maxval=nb_lines).start()
         i = 0
 
         # Intersecting the BedGraph and index files
@@ -592,8 +592,8 @@
                 # Running through the BedGraph file
                 for bam_line in bedgraph_fh:
                     i += 1
-                    if i % 10000 == 0:
-                        pbar.update(i)
+                    # if i % 10000 == 0:
+                        # pbar.update(i)
                     # Getting the BAM line info
                     bam_chrom = bam_line.split("\t")[0]
                     bam_start, bam_stop, bam_cpt = map(float, bam_line.split("\t")[1:4])
@@ -673,7 +673,7 @@
                             #cpt[sample_name][("intergenic", "intergenic")] = (bam_stop - bam_start) * bam_cpt
                             cpt[sample_label][("intergenic", "intergenic")] = (bam_stop - bam_start) * bam_cpt
                 gtf_index_file.close()
-        pbar.finish()
+        # pbar.finish()
     return cpt
 
 
@@ -1085,7 +1085,7 @@
 
     ## Showing the plot
     if pdf:  ## TODO: allow several output formats
-        pdf.savefig(format="pdf")
+        pdf.savefig()
         plt.close()
     elif svg:
         if svg == True:
--- a/alfa/ALFA_wrapper.py	Thu Dec 21 11:56:24 2017 -0500
+++ b/alfa/ALFA_wrapper.py	Thu Dec 21 12:58:57 2017 -0500
@@ -44,26 +44,29 @@
     args = parser.parse_args()
     return args
 
-def symlink_user_indexes(stranded_index, unstranded_index):
+def symlink_user_indexes(stranded_index, unstranded_index, tmp_dir):
     index='index'
-    os.symlink(stranded_index, index + '.stranded.index')
-    os.symlink(unstranded_index, index + '.unstranded.index')
+    os.symlink(stranded_index, os.path.join(tmp_dir, index + '.stranded.index'))
+    os.symlink(unstranded_index, os.path.join(tmp_dir, index + '.unstranded.index'))
     return index
 
-def get_input2_args(reads_list, format):
+def get_input2_args(reads_list, format, tmp_dir):
     n = len(reads_list)
     if n%2 != 0:
         exit_and_explain('Problem with pairing reads filename and reads label')
     if format == 'bam':
         input2_args = '--bam'
-    elif format == 'begraph':
+    elif format == 'bedgraph':
         input2_args = '--bedgraph'
-    input2_args='-i'
     k = 0
     reads_filenames = [''] * (n/2)
     reads_labels = [''] * (n/2)
     for i in range(0, n, 2):
-        reads_filenames[k] = reads_list[i].split('__fname__')[1]
+        curr_filename = reads_list[i].split('__fname__')[1]
+        # Alfa checks extension so the filename must end either by .bedgraph or by .bam
+        # We then create a symlink from file.dat to tmp_dir/annotation_n.<format> to avoid the error message
+        reads_filenames[k] = os.path.join(tmp_dir, 'annotation_' + str(k) + '.' + format)
+        os.symlink(curr_filename, reads_filenames[k])
         cur_label = reads_list[i+1].split('__label__')[1]
         reads_labels[k] = re.sub(r' ', '_', cur_label)
         if not reads_labels[k]:
@@ -88,7 +91,7 @@
 def merge_count_files(reads_labels):
     merged_count_file = open('count_file.txt', 'wb')
     for i in range(0, len(reads_labels)):
-        current_count_file = open('%s.categories_counts' % reads_labels[i], 'r')
+        current_count_file = open('%s.feature_counts.tsv' % reads_labels[i], 'r')
         merged_count_file.write('##LABEL: %s\n\n' % reads_labels[i])
         merged_count_file.write(current_count_file.read())
         merged_count_file.write('__________________________________________________________________\n')
@@ -108,7 +111,7 @@
     #INPUT1: Annotation File
     if args.indexes:
         # The indexes submitted by the user must exhibit the suffix '.(un)stranded.index' and will be called by alfa by their prefix
-        index = symlink_user_indexes(args.indexes[0], args.indexes[1])
+        index = symlink_user_indexes(args.indexes[0], args.indexes[1], tmp_dir)
         input1_args = '-g "%s"' % index
     elif args.bi_indexes:
         input1_args = '-g "%s"' % args.bi_indexes[0]
@@ -119,7 +122,7 @@
 
     #INPUT 2: Mapped Reads
     if args.reads:
-        input2_args, reads_filenames, reads_labels = get_input2_args(args.reads, args.reads_format[0])
+        input2_args, reads_filenames, reads_labels = get_input2_args(args.reads, args.reads_format[0], tmp_dir)
         strandness = '-s %s' % args.strandness[0]
     else:
         exit_and_explain('No reads submitted !')
@@ -129,17 +132,21 @@
     if not (args.output_pdf or args.output_png or args.output_svg):
         output_args = '--n'
     else:
+        plot_suffix = os.path.join(tmp_dir, "ALFA_plot");
         if args.output_pdf:
-            output_args = '--pdf plot.pdf'
+            output_args = '--pdf ' + plot_suffix + '.pdf'
         if args.output_png:
-            output_args = '--png plot'
+            output_args = '--png ' + plot_suffix
         if args.output_svg:
-            output_args = '--svg plot'
+            output_args = '--svg ' + plot_suffix
         if args.threshold:
             output_args = '%s -t %.3f %.3f' % (output_args, args.threshold[0], args.threshold[1])
 
     ##Run alfa
     cmd = 'python %s %s %s %s %s %s' % (alfa_path, input1_args, input2_args, strandness, categories_depth, output_args)
+    # Change into the tmp dir because ALFA produces files in the current dir
+    curr_dir = os.getcwd()
+    os.chdir(tmp_dir)
     logging.info("__________________________________________________________________\n")
     logging.info("Alfa execution")
     logging.info("__________________________________________________________________\n")
@@ -155,13 +162,13 @@
 
     ##Redirect outputs
     if args.output_pdf:
-        shutil.move('plot.pdf', args.output_pdf[0])
+        shutil.move(plot_suffix + '.pdf', args.output_pdf[0])
     if args.output_png:
-        shutil.move('plot' + '.categories.png', args.output_png[0])
-        shutil.move('plot' + '.biotypes.png', args.output_png[1])
+        shutil.move(plot_suffix + '.categories.png', args.output_png[0])
+        shutil.move(plot_suffix + '.biotypes.png', args.output_png[1])
     if args.output_svg:
-        shutil.move('plot' + '.categories.svg', args.output_svg[0])
-        shutil.move('plot' + '.biotypes.svg', args.output_svg[1])
+        shutil.move(plot_suffix + '.categories.svg', args.output_svg[0])
+        shutil.move(plot_suffix + '.biotypes.svg', args.output_svg[1])
     if args.output_count:
         count_filename = merge_count_files(reads_labels)
         shutil.move(count_filename, args.output_count[0])
@@ -179,5 +186,7 @@
             shutil.move(args.bi_indexes[0] + '.stranded.index', args.output_index[0])
             shutil.move(args.bi_indexes[1] + '.unstranded.index', args.output_index[1])
 
+    # Get back to the original dir and cleanup the tmp dir
+    os.chdir(curr_dir)
     cleanup_before_exit(tmp_dir)
 main()