comparison draw_features.py @ 0:636eeb4fb9ac draft

Uploaded
author greg
date Wed, 25 Jan 2023 20:38:21 +0000
parents
children 7d47800ee5ac
comparison
equal deleted inserted replaced
-1:000000000000 0:636eeb4fb9ac
1 #!/usr/bin/env python
2
3 import argparse
4 import os
5
6 import pandas
7 import matplotlib.pyplot as pyplot
8 from Bio import SeqIO
9 from dna_features_viewer import GraphicFeature, GraphicRecord
10
11
12 AMR_COLOR = '#FED976'
13 INC_GROUPS_COLOR = '#0570B0'
14 FEATURE_COLORS = [AMR_COLOR, INC_GROUPS_COLOR]
15 FIGURE_WIDTH = 13
16
17
18 def draw_features(feature_hits_files, contigs, output_dir):
19 ofh = open('process_log', 'w')
20 # Read feature_hits_files.
21 feature_hits = pandas.Series(dtype=object)
22 feature_plots = pandas.Series(dtype=object)
23 for feature_hits_file in feature_hits_files:
24 feature_name = os.path.basename(feature_hits_file)
25 # Make sure the file is not empty.
26 if os.path.isfile(feature_hits_file) and os.path.getsize(feature_hits_file) > 0:
27 best_hits = pandas.read_csv(filepath_or_buffer=feature_hits_file, sep='\t', header=None)
28 ofh.write("\nFeature file %s will be processed\n" % str(feature_hits_file))
29 else:
30 ofh.write("\nEmpty feature file %s will NOT be processed\n" % str(feature_hits_file))
31 best_hits = None
32 feature_hits[feature_name] = best_hits
33
34 # Draw one plot per contig for simplicity.
35 ofh.write("\nProcessing contigs file: %s\n" % str(contigs))
36 for contig in SeqIO.parse(contigs, 'fasta'):
37 ofh.write("Processing contig: %s\n" % str(contig))
38 contig_plot_png = os.path.join(output_dir, '%s.png' % str(contig.id))
39 feature_sets_to_plot = pandas.Series(dtype=object)
40 for feature_number in range(len(feature_hits)):
41 feature_name = feature_hits.index.to_list()[feature_number]
42 ofh.write("Processing feature name: %s\n" % str(feature_name))
43 these_features = feature_hits[feature_name]
44 if these_features is None or these_features.shape[0] == 0:
45 # No features.
46 continue
47 contig_features = these_features.loc[these_features.iloc[:, 0] == contig.id, :]
48 if contig_features is None or contig_features.shape[0] == 0:
49 # No features.
50 continue
51 features_to_plot = []
52 for i in range(contig_features.shape[0]):
53 i = contig_features.iloc[i, :]
54 features_to_plot += [GraphicFeature(start=i[1], end=i[2], label=i[3], strand=1 * i[5], color=FEATURE_COLORS[feature_number])]
55 feature_sets_to_plot[feature_name] = features_to_plot
56 ofh.write("Number of features to plot: %d\n" % len(feature_sets_to_plot))
57 if len(feature_sets_to_plot) == 0:
58 # No features.
59 continue
60 # Determine each plot height for later scaling
61 expected_plot_heights = []
62 for i in range(len(feature_sets_to_plot)):
63 record = GraphicRecord(sequence_length=len(contig), features=feature_sets_to_plot[i])
64 if i == len(feature_sets_to_plot) - 1:
65 with_ruler = True
66 else:
67 with_ruler = False
68 plot, _ = record.plot(figure_width=FIGURE_WIDTH, with_ruler=with_ruler)
69 expected_plot_heights += [plot.figure.get_size_inches()[1]]
70 plot_height_sum = sum(expected_plot_heights)
71 # Make a figure with separate plots for each feature class.
72 plots = pyplot.subplots(nrows=len(feature_sets_to_plot),
73 ncols=1,
74 sharex=True,
75 figsize=(FIGURE_WIDTH, plot_height_sum * .66666),
76 gridspec_kw={"height_ratios": expected_plot_heights})
77 figure = plots[0]
78 plots = plots[1]
79 if len(feature_sets_to_plot) == 1:
80 plots = [plots]
81 # Add each feature class's plot with the pre-determined height.
82 for i in range(len(feature_sets_to_plot)):
83 record = GraphicRecord(sequence_length=len(contig), features=feature_sets_to_plot[i])
84 if i == len(feature_sets_to_plot) - 1:
85 with_ruler = True
86 else:
87 with_ruler = False
88 plot, _ = record.plot(ax=plots[i], with_ruler=with_ruler, figure_width=FIGURE_WIDTH)
89 ymin, ymax = plot.figure.axes[0].get_ylim()
90 if i == 0:
91 plot.text(x=0, y=ymax, s=contig.id)
92 figure.tight_layout()
93 ofh.write("Saving PNG plot file: %s\n" % str(contig_plot_png))
94 figure.savefig(contig_plot_png)
95 feature_plots[contig.id] = contig_plot_png
96 ofh.close()
97
98
99 if __name__ == '__main__':
100 parser = argparse.ArgumentParser()
101
102 parser.add_argument('--feature_hits_dir', action='store', dest='feature_hits_dir', help='Directory of tabular files containing feature hits')
103 parser.add_argument('--contigs', action='store', dest='contigs', help='Fasta file of contigs')
104 parser.add_argument('--output_dir', action='store', dest='output_dir', help='Output directory')
105
106 args = parser.parse_args()
107
108 # Get thge collection of feature hits files. The collection
109 # will be sorted alphabetically and will contain 2 files
110 # named something like AMR_CDS_311_2022_12_20.fasta and
111 # Incompatibility_Groups_2023_01_01.fasta.
112 feature_hits_files = []
113 for file_name in sorted(os.listdir(args.feature_hits_dir)):
114 file_path = os.path.abspath(os.path.join(args.feature_hits_dir, file_name))
115 feature_hits_files.append(file_path)
116
117 draw_features(feature_hits_files, args.contigs, args.output_dir)