Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
comparison amplicon_analysis_pipeline.py @ 4:86a12d75ebe4 draft default tip
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 7be61b7ed35ca3deaad68d2eae384c8cd365bcb8
author | pjbriggs |
---|---|
date | Fri, 20 Dec 2019 06:59:49 -0500 |
parents | 3ab198df8f3f |
children |
comparison
equal
deleted
inserted
replaced
3:3ab198df8f3f | 4:86a12d75ebe4 |
---|---|
115 p.add_argument("-q",dest="trimming_threshold") | 115 p.add_argument("-q",dest="trimming_threshold") |
116 p.add_argument("-O",dest="minimum_overlap") | 116 p.add_argument("-O",dest="minimum_overlap") |
117 p.add_argument("-L",dest="minimum_length") | 117 p.add_argument("-L",dest="minimum_length") |
118 p.add_argument("-l",dest="sliding_window_length") | 118 p.add_argument("-l",dest="sliding_window_length") |
119 p.add_argument("-P",dest="pipeline", | 119 p.add_argument("-P",dest="pipeline", |
120 choices=["vsearch","uparse","qiime"], | 120 choices=["Vsearch","DADA2"], |
121 type=str.lower, | 121 type=str, |
122 default="vsearch") | 122 default="Vsearch") |
123 p.add_argument("-S",dest="use_silva",action="store_true") | 123 p.add_argument("-S",dest="use_silva",action="store_true") |
124 p.add_argument("-H",dest="use_homd",action="store_true") | 124 p.add_argument("-H",dest="use_homd",action="store_true") |
125 p.add_argument("-r",dest="reference_data_path") | 125 p.add_argument("-r",dest="reference_data_path") |
126 p.add_argument("-c",dest="categories_file") | 126 p.add_argument("-c",dest="categories_file") |
127 args = p.parse_args() | 127 args = p.parse_args() |
153 final_name.write("%s\n" % '\t'.join((r1,sample_name))) | 153 final_name.write("%s\n" % '\t'.join((r1,sample_name))) |
154 final_name.write("%s\n" % '\t'.join((r2,sample_name))) | 154 final_name.write("%s\n" % '\t'.join((r2,sample_name))) |
155 sample_names.append(sample_name) | 155 sample_names.append(sample_name) |
156 | 156 |
157 # Reference database | 157 # Reference database |
158 if args.use_silva: | 158 if args.pipeline == "Vsearch": |
159 if args.use_silva: | |
160 ref_database = "silva" | |
161 elif args.use_homd: | |
162 ref_database = "homd" | |
163 else: | |
164 ref_database = "gg" | |
165 elif args.pipeline == "DADA2": | |
159 ref_database = "silva" | 166 ref_database = "silva" |
160 elif args.use_homd: | |
161 ref_database = "homd" | |
162 else: | |
163 ref_database = "gg" | |
164 | 167 |
165 # Construct the pipeline command | 168 # Construct the pipeline command |
166 print "Amplicon analysis: constructing pipeline command" | 169 print "Amplicon analysis: constructing pipeline command" |
167 pipeline = PipelineCmd("Amplicon_analysis_pipeline.sh") | 170 pipeline = PipelineCmd("Amplicon_analysis_pipeline.sh") |
168 if args.forward_pcr_primer: | 171 if args.forward_pcr_primer: |
178 if args.sliding_window_length: | 181 if args.sliding_window_length: |
179 pipeline.add_args("-l",args.sliding_window_length) | 182 pipeline.add_args("-l",args.sliding_window_length) |
180 if args.reference_data_path: | 183 if args.reference_data_path: |
181 pipeline.add_args("-r",args.reference_data_path) | 184 pipeline.add_args("-r",args.reference_data_path) |
182 pipeline.add_args("-P",args.pipeline) | 185 pipeline.add_args("-P",args.pipeline) |
183 if ref_database == "silva": | 186 if args.pipeline == "Vsearch": |
184 pipeline.add_args("-S") | 187 if ref_database == "silva": |
185 elif ref_database == "homd": | 188 pipeline.add_args("-S") |
186 pipeline.add_args("-H") | 189 elif ref_database == "homd": |
190 pipeline.add_args("-H") | |
187 | 191 |
188 # Echo the pipeline command to stdout | 192 # Echo the pipeline command to stdout |
189 print "Running %s" % pipeline | 193 print "Running %s" % pipeline |
190 | 194 |
191 # Run the pipeline | 195 # Run the pipeline |
275 <body> | 279 <body> |
276 <h1>Amplicon analysis pipeline: Per-base Quality Boxplots (FastQC)</h1> | 280 <h1>Amplicon analysis pipeline: Per-base Quality Boxplots (FastQC)</h1> |
277 """) | 281 """) |
278 # Look for raw and trimmed FastQC output for each sample | 282 # Look for raw and trimmed FastQC output for each sample |
279 for sample_name in sample_names: | 283 for sample_name in sample_names: |
284 # Replace underscores with hyphens in sample names | |
285 sample_name = sample_name.replace('_','-') | |
286 # Write HTML file with links to the FastQC boxplots | |
280 fastqc_dir = os.path.join(sample_name,"FastQC") | 287 fastqc_dir = os.path.join(sample_name,"FastQC") |
281 quality_boxplots.write("<h2>%s</h2>" % sample_name) | 288 quality_boxplots.write("<h2>%s</h2>" % sample_name) |
282 for d in ("Raw","cutdapt_sickle/Q%s" % phred_score): | 289 for d in ("Raw","cutdapt_sickle/Q%s" % phred_score): |
283 quality_boxplots.write("<h3>%s</h3>" % d) | 290 quality_boxplots.write("<h3>%s</h3>" % d) |
284 fastqc_html_files = glob.glob( | 291 fastqc_html_files = glob.glob( |
304 boxplot)) | 311 boxplot)) |
305 quality_boxplots.write("""</body> | 312 quality_boxplots.write("""</body> |
306 </html> | 313 </html> |
307 """) | 314 """) |
308 | 315 |
316 # Handle DADA2 error rate plot PDFs | |
317 if args.pipeline == "DADA2": | |
318 print("Amplicon analysis: collecting error rate plots") | |
319 error_rate_plots_dir = os.path.abspath( | |
320 os.path.join("DADA2_OTU_tables", | |
321 "Error_rate_plots")) | |
322 error_rate_plot_pdfs = [os.path.basename(pdf) | |
323 for pdf in | |
324 sorted(glob.glob( | |
325 os.path.join(error_rate_plots_dir,"*.pdf")))] | |
326 error_rate_plots_html = os.path.join(error_rate_plots_dir, | |
327 "error_rate_plots.html") | |
328 with open(error_rate_plots_html,"w") as error_rate_plots_out: | |
329 error_rate_plots_out.write("""<html> | |
330 <head> | |
331 <title>Amplicon analysis pipeline: DADA2 Error Rate Plots</title> | |
332 <head> | |
333 <body> | |
334 <h1>Amplicon analysis pipeline: DADA2 Error Rate Plots</h1> | |
335 """) | |
336 error_rate_plots_out.write("<ul>\n") | |
337 for pdf in error_rate_plot_pdfs: | |
338 error_rate_plots_out.write("<li>%s</li>\n" % ahref(pdf)) | |
339 error_rate_plots_out.write("<ul>\n") | |
340 error_rate_plots_out.write("""</body> | |
341 </html> | |
342 """) | |
343 | |
309 # Handle additional output when categories file was supplied | 344 # Handle additional output when categories file was supplied |
310 if args.categories_file is not None: | 345 if args.categories_file is not None: |
311 # Alpha diversity boxplots | 346 # Alpha diversity boxplots |
312 print "Amplicon analysis: indexing alpha diversity boxplots" | 347 print "Amplicon analysis: indexing alpha diversity boxplots" |
313 boxplots_dir = os.path.abspath( | 348 boxplots_dir = os.path.abspath( |
314 os.path.join("RESULTS", | 349 os.path.join("RESULTS", |
315 "%s_%s" % (args.pipeline.title(), | 350 "%s_%s" % (args.pipeline, |
316 ref_database), | 351 ref_database), |
317 "Alpha_diversity", | 352 "Alpha_diversity", |
318 "Alpha_diversity_boxplot", | 353 "Alpha_diversity_boxplot", |
319 "Categories_shannon")) | 354 "Categories_shannon")) |
320 print "Amplicon analysis: gathering PDFs from %s" % boxplots_dir | 355 print "Amplicon analysis: gathering PDFs from %s" % boxplots_dir |