Mercurial > repos > mheinzl > fsd_bvsa
comparison fsd_beforevsafter.py @ 2:e8115b71edbd draft
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
author | mheinzl |
---|---|
date | Tue, 15 May 2018 13:50:02 -0400 |
parents | 6716b1cddf3e |
children | 327c40a821ed |
comparison
equal
deleted
inserted
replaced
1:6ed6dca9488f | 2:e8115b71edbd |
---|---|
9 # a TABULAR file with tags before the alignment to the SSCS, a FASTA file with reads that were part of the DCS and | 9 # a TABULAR file with tags before the alignment to the SSCS, a FASTA file with reads that were part of the DCS and |
10 # a FASTA file with tags after trimming as input (optional). | 10 # a FASTA file with tags after trimming as input (optional). |
11 # The program produces a plot which shows the distribution of family sizes of the DCS from the input files and | 11 # The program produces a plot which shows the distribution of family sizes of the DCS from the input files and |
12 # a CSV file with the data of the plot. | 12 # a CSV file with the data of the plot. |
13 | 13 |
14 # USAGE: python FSD before vs after_no_refF1.3_FINAL.py --inputFile_SSCS filenameSSCS --makeDCS filenameMakeDCS --afterTrimming filenameAfterTrimming -- alignedTags filenameTagsRefGenome | 14 # USAGE: python FSD before vs after_no_refF1.3_FINAL.py --inputFile_SSCS filenameSSCS --inputName1 filenameSSCS --makeDCS filenameMakeDCS --afterTrimming filenameAfterTrimming -- alignedTags filenameTagsRefGenome |
15 # --sep "characterWhichSeparatesCSVFile" --output_csv outptufile_name_csv --output_pdf outptufile_name_pdf | 15 # --sep "characterWhichSeparatesCSVFile" --output_csv outptufile_name_csv --output_pdf outptufile_name_pdf |
16 | 16 |
17 | 17 |
18 import numpy | 18 import numpy |
19 import matplotlib.pyplot as plt | 19 import matplotlib.pyplot as plt |
44 | 44 |
45 def make_argparser(): | 45 def make_argparser(): |
46 parser = argparse.ArgumentParser(description='Analysis of read loss in duplex sequencing data') | 46 parser = argparse.ArgumentParser(description='Analysis of read loss in duplex sequencing data') |
47 parser.add_argument('--inputFile_SSCS', | 47 parser.add_argument('--inputFile_SSCS', |
48 help='Tabular File with three columns: ab or ba, tag and family size.') | 48 help='Tabular File with three columns: ab or ba, tag and family size.') |
49 parser.add_argument('--inputName1') | |
49 parser.add_argument('--makeDCS', | 50 parser.add_argument('--makeDCS', |
50 help='FASTA File with information about tag and family size in the header.') | 51 help='FASTA File with information about tag and family size in the header.') |
51 parser.add_argument('--afterTrimming',default=None, | 52 parser.add_argument('--afterTrimming',default=None, |
52 help='FASTA File with information about tag and family size in the header.') | 53 help='FASTA File with information about tag and family size in the header.') |
53 parser.add_argument('--alignedTags',default=None, | 54 parser.add_argument('--alignedTags',default=None, |
63 def compare_read_families_read_loss(argv): | 64 def compare_read_families_read_loss(argv): |
64 parser = make_argparser() | 65 parser = make_argparser() |
65 args = parser.parse_args(argv[1:]) | 66 args = parser.parse_args(argv[1:]) |
66 | 67 |
67 SSCS_file = args.inputFile_SSCS | 68 SSCS_file = args.inputFile_SSCS |
69 SSCS_file_name = args.inputName1 | |
68 makeConsensus = args.makeDCS | 70 makeConsensus = args.makeDCS |
69 afterTrimming = args.afterTrimming | 71 afterTrimming = args.afterTrimming |
70 ref_genome = args.alignedTags | 72 ref_genome = args.alignedTags |
71 title_file = args.output_csv | 73 title_file = args.output_csv |
72 title_file2 = args.output_pdf | 74 title_file2 = args.output_pdf |
78 | 80 |
79 with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf: | 81 with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf: |
80 ### PLOT ### | 82 ### PLOT ### |
81 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format | 83 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
82 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color | 84 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
83 plt.rcParams['xtick.labelsize'] = 12 | 85 plt.rcParams['xtick.labelsize'] = 14 |
84 plt.rcParams['ytick.labelsize'] = 12 | 86 plt.rcParams['ytick.labelsize'] = 14 |
85 plt.rcParams['patch.edgecolor'] = "black" | 87 plt.rcParams['patch.edgecolor'] = "black" |
86 fig = plt.figure() | 88 fig = plt.figure() |
87 plt.subplots_adjust(bottom=0.3) | 89 plt.subplots_adjust(bottom=0.3) |
88 | 90 |
89 list1 = [] | 91 list1 = [] |
219 | 221 |
220 legend4 = "* In the plot, the family sizes of ab and ba strands and of both duplex tags were used.\nWhereas the total numbers indicate only the single count of the formed duplex tags." | 222 legend4 = "* In the plot, the family sizes of ab and ba strands and of both duplex tags were used.\nWhereas the total numbers indicate only the single count of the formed duplex tags." |
221 plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure) | 223 plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure) |
222 | 224 |
223 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) | 225 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) |
224 plt.title("Family Size Distribution of Tags from various Steps of the Galaxy Pipeline", fontsize=14) | 226 plt.title("Family size distribution of tags from various steps of the Du Novo pipeline", fontsize=14) |
225 plt.xlabel("No. of Family Members", fontsize=12) | 227 plt.xlabel("Family size", fontsize=14) |
226 plt.ylabel("Absolute Frequency", fontsize=12) | 228 plt.ylabel("Absolute Frequency", fontsize=14) |
227 plt.grid(b=True, which="major", color="#424242", linestyle=":") | 229 plt.grid(b=True, which="major", color="#424242", linestyle=":") |
228 plt.margins(0.01, None) | 230 plt.margins(0.01, None) |
229 | 231 |
230 pdf.savefig(fig, bbox_inch="tight") | 232 pdf.savefig(fig, bbox_inch="tight") |
231 plt.close() | 233 plt.close() |
232 | 234 |
233 # write information about plot into a csv file | 235 # write information about plot into a csv file |
234 output_file.write("Dataset:{}{}\n".format(sep, SSCS_file)) | 236 output_file.write("Dataset:{}{}\n".format(sep, SSCS_file_name)) |
235 if ref_genome != str(None): | 237 if ref_genome != str(None): |
236 output_file.write("{}AB{}BA\n".format(sep, sep)) | 238 output_file.write("{}AB{}BA\n".format(sep, sep)) |
237 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(quant_ab_ref), sep, max(quant_ba_ref))) | 239 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(quant_ab_ref), sep, max(quant_ba_ref))) |
238 output_file.write( | 240 output_file.write( |
239 "absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1])) | 241 "absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1])) |