Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
comparison amplicon_analysis_pipeline.py @ 4:86a12d75ebe4 draft default tip
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 7be61b7ed35ca3deaad68d2eae384c8cd365bcb8
| author | pjbriggs |
|---|---|
| date | Fri, 20 Dec 2019 06:59:49 -0500 |
| parents | 3ab198df8f3f |
| children |
comparison
equal
deleted
inserted
replaced
| 3:3ab198df8f3f | 4:86a12d75ebe4 |
|---|---|
| 115 p.add_argument("-q",dest="trimming_threshold") | 115 p.add_argument("-q",dest="trimming_threshold") |
| 116 p.add_argument("-O",dest="minimum_overlap") | 116 p.add_argument("-O",dest="minimum_overlap") |
| 117 p.add_argument("-L",dest="minimum_length") | 117 p.add_argument("-L",dest="minimum_length") |
| 118 p.add_argument("-l",dest="sliding_window_length") | 118 p.add_argument("-l",dest="sliding_window_length") |
| 119 p.add_argument("-P",dest="pipeline", | 119 p.add_argument("-P",dest="pipeline", |
| 120 choices=["vsearch","uparse","qiime"], | 120 choices=["Vsearch","DADA2"], |
| 121 type=str.lower, | 121 type=str, |
| 122 default="vsearch") | 122 default="Vsearch") |
| 123 p.add_argument("-S",dest="use_silva",action="store_true") | 123 p.add_argument("-S",dest="use_silva",action="store_true") |
| 124 p.add_argument("-H",dest="use_homd",action="store_true") | 124 p.add_argument("-H",dest="use_homd",action="store_true") |
| 125 p.add_argument("-r",dest="reference_data_path") | 125 p.add_argument("-r",dest="reference_data_path") |
| 126 p.add_argument("-c",dest="categories_file") | 126 p.add_argument("-c",dest="categories_file") |
| 127 args = p.parse_args() | 127 args = p.parse_args() |
| 153 final_name.write("%s\n" % '\t'.join((r1,sample_name))) | 153 final_name.write("%s\n" % '\t'.join((r1,sample_name))) |
| 154 final_name.write("%s\n" % '\t'.join((r2,sample_name))) | 154 final_name.write("%s\n" % '\t'.join((r2,sample_name))) |
| 155 sample_names.append(sample_name) | 155 sample_names.append(sample_name) |
| 156 | 156 |
| 157 # Reference database | 157 # Reference database |
| 158 if args.use_silva: | 158 if args.pipeline == "Vsearch": |
| 159 if args.use_silva: | |
| 160 ref_database = "silva" | |
| 161 elif args.use_homd: | |
| 162 ref_database = "homd" | |
| 163 else: | |
| 164 ref_database = "gg" | |
| 165 elif args.pipeline == "DADA2": | |
| 159 ref_database = "silva" | 166 ref_database = "silva" |
| 160 elif args.use_homd: | |
| 161 ref_database = "homd" | |
| 162 else: | |
| 163 ref_database = "gg" | |
| 164 | 167 |
| 165 # Construct the pipeline command | 168 # Construct the pipeline command |
| 166 print "Amplicon analysis: constructing pipeline command" | 169 print "Amplicon analysis: constructing pipeline command" |
| 167 pipeline = PipelineCmd("Amplicon_analysis_pipeline.sh") | 170 pipeline = PipelineCmd("Amplicon_analysis_pipeline.sh") |
| 168 if args.forward_pcr_primer: | 171 if args.forward_pcr_primer: |
| 178 if args.sliding_window_length: | 181 if args.sliding_window_length: |
| 179 pipeline.add_args("-l",args.sliding_window_length) | 182 pipeline.add_args("-l",args.sliding_window_length) |
| 180 if args.reference_data_path: | 183 if args.reference_data_path: |
| 181 pipeline.add_args("-r",args.reference_data_path) | 184 pipeline.add_args("-r",args.reference_data_path) |
| 182 pipeline.add_args("-P",args.pipeline) | 185 pipeline.add_args("-P",args.pipeline) |
| 183 if ref_database == "silva": | 186 if args.pipeline == "Vsearch": |
| 184 pipeline.add_args("-S") | 187 if ref_database == "silva": |
| 185 elif ref_database == "homd": | 188 pipeline.add_args("-S") |
| 186 pipeline.add_args("-H") | 189 elif ref_database == "homd": |
| 190 pipeline.add_args("-H") | |
| 187 | 191 |
| 188 # Echo the pipeline command to stdout | 192 # Echo the pipeline command to stdout |
| 189 print "Running %s" % pipeline | 193 print "Running %s" % pipeline |
| 190 | 194 |
| 191 # Run the pipeline | 195 # Run the pipeline |
| 275 <body> | 279 <body> |
| 276 <h1>Amplicon analysis pipeline: Per-base Quality Boxplots (FastQC)</h1> | 280 <h1>Amplicon analysis pipeline: Per-base Quality Boxplots (FastQC)</h1> |
| 277 """) | 281 """) |
| 278 # Look for raw and trimmed FastQC output for each sample | 282 # Look for raw and trimmed FastQC output for each sample |
| 279 for sample_name in sample_names: | 283 for sample_name in sample_names: |
| 284 # Replace underscores with hyphens in sample names | |
| 285 sample_name = sample_name.replace('_','-') | |
| 286 # Write HTML file with links to the FastQC boxplots | |
| 280 fastqc_dir = os.path.join(sample_name,"FastQC") | 287 fastqc_dir = os.path.join(sample_name,"FastQC") |
| 281 quality_boxplots.write("<h2>%s</h2>" % sample_name) | 288 quality_boxplots.write("<h2>%s</h2>" % sample_name) |
| 282 for d in ("Raw","cutdapt_sickle/Q%s" % phred_score): | 289 for d in ("Raw","cutdapt_sickle/Q%s" % phred_score): |
| 283 quality_boxplots.write("<h3>%s</h3>" % d) | 290 quality_boxplots.write("<h3>%s</h3>" % d) |
| 284 fastqc_html_files = glob.glob( | 291 fastqc_html_files = glob.glob( |
| 304 boxplot)) | 311 boxplot)) |
| 305 quality_boxplots.write("""</body> | 312 quality_boxplots.write("""</body> |
| 306 </html> | 313 </html> |
| 307 """) | 314 """) |
| 308 | 315 |
| 316 # Handle DADA2 error rate plot PDFs | |
| 317 if args.pipeline == "DADA2": | |
| 318 print("Amplicon analysis: collecting error rate plots") | |
| 319 error_rate_plots_dir = os.path.abspath( | |
| 320 os.path.join("DADA2_OTU_tables", | |
| 321 "Error_rate_plots")) | |
| 322 error_rate_plot_pdfs = [os.path.basename(pdf) | |
| 323 for pdf in | |
| 324 sorted(glob.glob( | |
| 325 os.path.join(error_rate_plots_dir,"*.pdf")))] | |
| 326 error_rate_plots_html = os.path.join(error_rate_plots_dir, | |
| 327 "error_rate_plots.html") | |
| 328 with open(error_rate_plots_html,"w") as error_rate_plots_out: | |
| 329 error_rate_plots_out.write("""<html> | |
| 330 <head> | |
| 331 <title>Amplicon analysis pipeline: DADA2 Error Rate Plots</title> | |
| 332 <head> | |
| 333 <body> | |
| 334 <h1>Amplicon analysis pipeline: DADA2 Error Rate Plots</h1> | |
| 335 """) | |
| 336 error_rate_plots_out.write("<ul>\n") | |
| 337 for pdf in error_rate_plot_pdfs: | |
| 338 error_rate_plots_out.write("<li>%s</li>\n" % ahref(pdf)) | |
| 339 error_rate_plots_out.write("<ul>\n") | |
| 340 error_rate_plots_out.write("""</body> | |
| 341 </html> | |
| 342 """) | |
| 343 | |
| 309 # Handle additional output when categories file was supplied | 344 # Handle additional output when categories file was supplied |
| 310 if args.categories_file is not None: | 345 if args.categories_file is not None: |
| 311 # Alpha diversity boxplots | 346 # Alpha diversity boxplots |
| 312 print "Amplicon analysis: indexing alpha diversity boxplots" | 347 print "Amplicon analysis: indexing alpha diversity boxplots" |
| 313 boxplots_dir = os.path.abspath( | 348 boxplots_dir = os.path.abspath( |
| 314 os.path.join("RESULTS", | 349 os.path.join("RESULTS", |
| 315 "%s_%s" % (args.pipeline.title(), | 350 "%s_%s" % (args.pipeline, |
| 316 ref_database), | 351 ref_database), |
| 317 "Alpha_diversity", | 352 "Alpha_diversity", |
| 318 "Alpha_diversity_boxplot", | 353 "Alpha_diversity_boxplot", |
| 319 "Categories_shannon")) | 354 "Categories_shannon")) |
| 320 print "Amplicon analysis: gathering PDFs from %s" % boxplots_dir | 355 print "Amplicon analysis: gathering PDFs from %s" % boxplots_dir |