22
|
1 #!/usr/bin/python
|
|
2
|
|
3 import argparse
|
|
4 import logging
|
|
5 import os
|
|
6 import re
|
|
7 import shutil
|
|
8 import subprocess
|
|
9 import sys
|
|
10 import tempfile
|
|
11
|
|
12 def exit_and_explain(msg):
|
|
13 logging.critical(msg)
|
|
14 sys.exit(msg)
|
|
15
|
|
16 def cleanup_before_exit(tmp_dir):
|
|
17 if tmp_dir and os.path.exists(tmp_dir):
|
|
18 shutil.rmtree(tmp_dir)
|
|
19
|
|
20 def get_arg():
|
|
21 parser = argparse.ArgumentParser()
|
|
22 parser.add_argument('--project_name', dest='project_name', action='store', nargs=1, metavar='project_name', type=str)
|
|
23 #Input 1: Annotation File
|
|
24 parser.add_argument('--index', dest='indexes', action='store', nargs=2, metavar=('stranded_index_filename', 'unstranded_index_filename'), type=str)
|
|
25 parser.add_argument('--bi_index', dest='bi_indexes', action='store', nargs=1, metavar='built_in_indexes_dir_path', type=str )
|
|
26 parser.add_argument('--annotation', dest='annotation_file', action='store', nargs=1, metavar='annotation_gtf_file', type=str )
|
|
27 #Input 2: Mapped Reads
|
|
28 parser.add_argument('--reads_format', dest='reads_format', action='store', nargs=1, choices=['bam', 'bedgraph'], metavar='reads_format', type=str)
|
|
29 parser.add_argument('--reads', dest='reads', action='store', nargs='+', metavar=('bam_file1 label1',""), type=str)
|
|
30 parser.add_argument('--strandness', dest='strandness', action='store', nargs=1, default=['unstranded'], choices=['unstranded', 'forward', 'reverse'], metavar='strandness', type=str)
|
|
31 #Output files
|
|
32 parser.add_argument('--output_pdf', dest='output_pdf', action='store', nargs=1, metavar='output_pdf_filename', type=str)
|
|
33 parser.add_argument('--output_svg', dest='output_svg', action='store', nargs=2, metavar=('categories_svg_filename', 'biotypes_svg_filename'), type=str)
|
|
34 parser.add_argument('--output_png', dest='output_png', action='store', nargs=2, metavar=('categories_png_filename', 'biotypes_png_filename'), type=str)
|
|
35 parser.add_argument('--output_count', dest='output_count', action='store', nargs=1, metavar='output_count_filename', type=str)
|
|
36 parser.add_argument('--output_index', dest='output_indexes', action='store', nargs=2, metavar=('output_stranded_index_filename', 'output_unstranded_index_filename'), type=str)
|
|
37 #Output Options
|
|
38 parser.add_argument('--categories_depth', dest='categories_depth', action='store', nargs=1, default=[3], choices=range(1,5), metavar='categories_depth', type=int)
|
|
39 parser.add_argument('--plot_format', dest='plot_format', action='store', nargs=1, choices=['pdf', 'png', 'svg'], metavar='plot_format', type=str)
|
|
40 parser.add_argument('--threshold', dest='threshold', action='store', nargs=2, metavar=('yMin', 'yMax'), type=float)
|
|
41 #Internal variables
|
|
42 parser.add_argument('--log_report', dest='log_report', action='store', nargs=1, metavar='log_filename', type=str)
|
|
43 parser.add_argument('--tool_dir', dest='GALAXY_TOOL_DIR', action='store', nargs=1, metavar='galaxy_tool_dir_path', type=str)
|
|
44 args = parser.parse_args()
|
|
45 return args
|
|
46
|
28
|
47 def symlink_user_indexes(stranded_index, unstranded_index):
|
22
|
48 index='index'
|
28
|
49 os.symlink(stranded_index, index + '.stranded.index')
|
|
50 os.symlink(unstranded_index, index + '.unstranded.index')
|
22
|
51 return index
|
|
52
|
|
53 def get_input2_args(reads_list, format):
|
|
54 n = len(reads_list)
|
|
55 if n%2 != 0:
|
|
56 exit_and_explain('Problem with pairing reads filename and reads label')
|
32
|
57 if format == 'bam':
|
|
58 input2_args = '--bam'
|
|
59 elif format == 'begraph':
|
|
60 input2_args = '--bedgraph'
|
22
|
61 input2_args='-i'
|
30
|
62 k = 0
|
22
|
63 reads_filenames = [''] * (n/2)
|
|
64 reads_labels = [''] * (n/2)
|
|
65 for i in range(0, n, 2):
|
|
66 reads_filenames[k] = reads_list[i].split('__fname__')[1]
|
31
|
67 cur_label = reads_list[i+1].split('__label__')[1]
|
|
68 reads_labels[k] = re.sub(r' ', '_', cur_label)
|
22
|
69 if not reads_labels[k]:
|
29
|
70 reads_labels[k] = 'sample_%s' % str(k)
|
31
|
71 input2_args='%s "%s" "%s"' % (input2_args, reads_filenames[k], reads_labels[k])
|
22
|
72 k += 1
|
|
73 return input2_args, reads_filenames, reads_labels
|
|
74
|
|
75 def redirect_errors(alfa_out, alfa_err):
|
|
76 # When the option --n is enabled, alfa prints '### End of the program' in stderr even if the process worked-
|
|
77 # The following lines to avoid the tool from crashing in this case
|
|
78 if alfa_err and not re.search('### End of program', alfa_err):
|
|
79 # When alfa prints '### End of program' in stdout, all the messages in stderr are considered
|
|
80 # as warnings and not as errors.
|
|
81 if re.search('### End of program', alfa_out):
|
|
82 logging.warning("The script ALFA.py encountered the following warning:\n\n%s" % alfa_err)
|
|
83 logging.info("\n******************************************************************\n")
|
|
84 # True errors make the script exits
|
|
85 else:
|
|
86 exit_and_explain("The script ALFA.py encountered the following error:\n\n%s" % alfa_err)
|
|
87
|
|
88 def merge_count_files(reads_labels):
|
|
89 merged_count_file = open('count_file.txt', 'wb')
|
|
90 for i in range(0, len(reads_labels)):
|
31
|
91 current_count_file = open('%s.categories_counts' % reads_labels[i], 'r')
|
30
|
92 merged_count_file.write('##LABEL: %s\n\n' % reads_labels[i])
|
22
|
93 merged_count_file.write(current_count_file.read())
|
|
94 merged_count_file.write('__________________________________________________________________\n')
|
|
95 current_count_file.close()
|
|
96 merged_count_file.close()
|
|
97 return 'count_file.txt'
|
|
98
|
|
99 def main():
|
|
100 args = get_arg()
|
|
101
|
|
102 if not (args.output_pdf or args.output_png or args.output_svg or args.output_indexes or args.output_count):
|
|
103 exit_and_explain('Error: no output to return\nProcess Aborted\n')
|
26
|
104 tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='')
|
22
|
105 logging.basicConfig(level=logging.INFO, filename=args.log_report[0], filemode="a+", format='%(message)s')
|
|
106 alfa_path = os.path.join(args.GALAXY_TOOL_DIR[0], 'ALFA.py')
|
|
107
|
|
108 #INPUT1: Annotation File
|
|
109 if args.indexes:
|
|
110 # The indexes submitted by the user must exhibit the suffix '.(un)stranded.index' and will be called by alfa by their prefix
|
28
|
111 index = symlink_user_indexes(args.indexes[0], args.indexes[1])
|
31
|
112 input1_args = '-g "%s"' % index
|
22
|
113 elif args.bi_indexes:
|
31
|
114 input1_args = '-g "%s"' % args.bi_indexes[0]
|
22
|
115 elif args.annotation_file:
|
31
|
116 input1_args = '-a "%s"' % args.annotation_file[0]
|
22
|
117 else:
|
|
118 exit_and_explain('No annotation file submitted !')
|
|
119
|
|
120 #INPUT 2: Mapped Reads
|
|
121 if args.reads:
|
|
122 input2_args, reads_filenames, reads_labels = get_input2_args(args.reads, args.reads_format[0])
|
|
123 strandness = '-s %s' % args.strandness[0]
|
|
124 else:
|
|
125 exit_and_explain('No reads submitted !')
|
|
126
|
|
127 ##Output options
|
|
128 categories_depth = '-d %s' % args.categories_depth[0]
|
|
129 if not (args.output_pdf or args.output_png or args.output_svg):
|
|
130 output_args = '--n'
|
|
131 else:
|
|
132 if args.output_pdf:
|
|
133 output_args = '--pdf plot.pdf'
|
|
134 if args.output_png:
|
|
135 output_args = '--png plot'
|
|
136 if args.output_svg:
|
|
137 output_args = '--svg plot'
|
|
138 if args.threshold:
|
|
139 output_args = '%s -t %.3f %.3f' % (output_args, args.threshold[0], args.threshold[1])
|
|
140
|
|
141 ##Run alfa
|
|
142 cmd = 'python %s %s %s %s %s %s' % (alfa_path, input1_args, input2_args, strandness, categories_depth, output_args)
|
|
143 logging.info("__________________________________________________________________\n")
|
|
144 logging.info("Alfa execution")
|
|
145 logging.info("__________________________________________________________________\n")
|
|
146 logging.info("Command Line:\n%s\n" % cmd)
|
|
147 logging.info("------------------------------------------------------------------\n")
|
|
148 alfa_result = subprocess.Popen(args=cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
149 alfa_out, alfa_err = alfa_result.communicate()
|
|
150
|
|
151 ##Handle stdout, warning, errors...
|
|
152 redirect_errors(alfa_out, alfa_err)
|
|
153
|
|
154 logging.info("Alfa prompt:\n%s" % alfa_out)
|
|
155
|
|
156 ##Redirect outputs
|
|
157 if args.output_pdf:
|
|
158 shutil.move('plot.pdf', args.output_pdf[0])
|
|
159 if args.output_png:
|
|
160 shutil.move('plot' + '.categories.png', args.output_png[0])
|
|
161 shutil.move('plot' + '.biotypes.png', args.output_png[1])
|
|
162 if args.output_svg:
|
|
163 shutil.move('plot' + '.categories.svg', args.output_svg[0])
|
|
164 shutil.move('plot' + '.biotypes.svg', args.output_svg[1])
|
|
165 if args.output_count:
|
|
166 count_filename = merge_count_files(reads_labels)
|
|
167 shutil.move(count_filename, args.output_count[0])
|
|
168 if args.output_indexes:
|
|
169 if args.annotation_file:
|
|
170 indexes_regex = re.compile('.*\.index')
|
|
171 indexes = filter(indexes_regex.search, os.listdir('.'))
|
|
172 indexes.sort()
|
|
173 shutil.move(indexes[0], args.output_indexes[0])
|
|
174 shutil.move(indexes[1], args.output_indexes[1])
|
|
175 if args.indexes:
|
|
176 shutil.move(index + '.stranded.index', args.output_indexes[0])
|
|
177 shutil.move(index + '.unstranded.index', args.output_indexes[1])
|
|
178 if args.bi_indexes:
|
|
179 shutil.move(args.bi_indexes[0] + '.stranded.index', args.output_index[0])
|
|
180 shutil.move(args.bi_indexes[1] + '.unstranded.index', args.output_index[1])
|
|
181
|
|
182 cleanup_before_exit(tmp_dir)
|
29
|
183 main()
|