comparison fsd_beforevsafter.py @ 2:e8115b71edbd draft

planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
author mheinzl
date Tue, 15 May 2018 13:50:02 -0400
parents 6716b1cddf3e
children 327c40a821ed
comparison
equal deleted inserted replaced
1:6ed6dca9488f 2:e8115b71edbd
9 # a TABULAR file with tags before the alignment to the SSCS, a FASTA file with reads that were part of the DCS and 9 # a TABULAR file with tags before the alignment to the SSCS, a FASTA file with reads that were part of the DCS and
10 # a FASTA file with tags after trimming as input (optional). 10 # a FASTA file with tags after trimming as input (optional).
11 # The program produces a plot which shows the distribution of family sizes of the DCS from the input files and 11 # The program produces a plot which shows the distribution of family sizes of the DCS from the input files and
12 # a CSV file with the data of the plot. 12 # a CSV file with the data of the plot.
13 13
14 # USAGE: python FSD before vs after_no_refF1.3_FINAL.py --inputFile_SSCS filenameSSCS --makeDCS filenameMakeDCS --afterTrimming filenameAfterTrimming -- alignedTags filenameTagsRefGenome 14 # USAGE: python FSD before vs after_no_refF1.3_FINAL.py --inputFile_SSCS filenameSSCS --inputName1 filenameSSCS --makeDCS filenameMakeDCS --afterTrimming filenameAfterTrimming -- alignedTags filenameTagsRefGenome
15 # --sep "characterWhichSeparatesCSVFile" --output_csv outptufile_name_csv --output_pdf outptufile_name_pdf 15 # --sep "characterWhichSeparatesCSVFile" --output_csv outptufile_name_csv --output_pdf outptufile_name_pdf
16 16
17 17
18 import numpy 18 import numpy
19 import matplotlib.pyplot as plt 19 import matplotlib.pyplot as plt
44 44
45 def make_argparser(): 45 def make_argparser():
46 parser = argparse.ArgumentParser(description='Analysis of read loss in duplex sequencing data') 46 parser = argparse.ArgumentParser(description='Analysis of read loss in duplex sequencing data')
47 parser.add_argument('--inputFile_SSCS', 47 parser.add_argument('--inputFile_SSCS',
48 help='Tabular File with three columns: ab or ba, tag and family size.') 48 help='Tabular File with three columns: ab or ba, tag and family size.')
49 parser.add_argument('--inputName1')
49 parser.add_argument('--makeDCS', 50 parser.add_argument('--makeDCS',
50 help='FASTA File with information about tag and family size in the header.') 51 help='FASTA File with information about tag and family size in the header.')
51 parser.add_argument('--afterTrimming',default=None, 52 parser.add_argument('--afterTrimming',default=None,
52 help='FASTA File with information about tag and family size in the header.') 53 help='FASTA File with information about tag and family size in the header.')
53 parser.add_argument('--alignedTags',default=None, 54 parser.add_argument('--alignedTags',default=None,
63 def compare_read_families_read_loss(argv): 64 def compare_read_families_read_loss(argv):
64 parser = make_argparser() 65 parser = make_argparser()
65 args = parser.parse_args(argv[1:]) 66 args = parser.parse_args(argv[1:])
66 67
67 SSCS_file = args.inputFile_SSCS 68 SSCS_file = args.inputFile_SSCS
69 SSCS_file_name = args.inputName1
68 makeConsensus = args.makeDCS 70 makeConsensus = args.makeDCS
69 afterTrimming = args.afterTrimming 71 afterTrimming = args.afterTrimming
70 ref_genome = args.alignedTags 72 ref_genome = args.alignedTags
71 title_file = args.output_csv 73 title_file = args.output_csv
72 title_file2 = args.output_pdf 74 title_file2 = args.output_pdf
78 80
79 with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf: 81 with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf:
80 ### PLOT ### 82 ### PLOT ###
81 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format 83 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format
82 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color 84 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color
83 plt.rcParams['xtick.labelsize'] = 12 85 plt.rcParams['xtick.labelsize'] = 14
84 plt.rcParams['ytick.labelsize'] = 12 86 plt.rcParams['ytick.labelsize'] = 14
85 plt.rcParams['patch.edgecolor'] = "black" 87 plt.rcParams['patch.edgecolor'] = "black"
86 fig = plt.figure() 88 fig = plt.figure()
87 plt.subplots_adjust(bottom=0.3) 89 plt.subplots_adjust(bottom=0.3)
88 90
89 list1 = [] 91 list1 = []
219 221
220 legend4 = "* In the plot, the family sizes of ab and ba strands and of both duplex tags were used.\nWhereas the total numbers indicate only the single count of the formed duplex tags." 222 legend4 = "* In the plot, the family sizes of ab and ba strands and of both duplex tags were used.\nWhereas the total numbers indicate only the single count of the formed duplex tags."
221 plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure) 223 plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure)
222 224
223 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) 225 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True)
224 plt.title("Family Size Distribution of Tags from various Steps of the Galaxy Pipeline", fontsize=14) 226 plt.title("Family size distribution of tags from various steps of the Du Novo pipeline", fontsize=14)
225 plt.xlabel("No. of Family Members", fontsize=12) 227 plt.xlabel("Family size", fontsize=14)
226 plt.ylabel("Absolute Frequency", fontsize=12) 228 plt.ylabel("Absolute Frequency", fontsize=14)
227 plt.grid(b=True, which="major", color="#424242", linestyle=":") 229 plt.grid(b=True, which="major", color="#424242", linestyle=":")
228 plt.margins(0.01, None) 230 plt.margins(0.01, None)
229 231
230 pdf.savefig(fig, bbox_inch="tight") 232 pdf.savefig(fig, bbox_inch="tight")
231 plt.close() 233 plt.close()
232 234
233 # write information about plot into a csv file 235 # write information about plot into a csv file
234 output_file.write("Dataset:{}{}\n".format(sep, SSCS_file)) 236 output_file.write("Dataset:{}{}\n".format(sep, SSCS_file_name))
235 if ref_genome != str(None): 237 if ref_genome != str(None):
236 output_file.write("{}AB{}BA\n".format(sep, sep)) 238 output_file.write("{}AB{}BA\n".format(sep, sep))
237 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(quant_ab_ref), sep, max(quant_ba_ref))) 239 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(quant_ab_ref), sep, max(quant_ba_ref)))
238 output_file.write( 240 output_file.write(
239 "absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1])) 241 "absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1]))