diff NGSrich_0.5.5/src/_main/NGSrichEvaluate.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NGSrich_0.5.5/src/_main/NGSrichEvaluate.java	Mon Nov 21 08:12:19 2011 -0500
@@ -0,0 +1,227 @@
+package _main;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.sql.Time;
+import java.util.Properties;
+import middlewares.Misc;
+import exceptions.ChromosomeMismatchException;
+import exceptions.FileFormatException;
+
+/**
+ * This is the Main-class of the evaluation part of the software. This class
+ * uses the Enrichment-class to process the phases (the parts) of the pipeline.
+ * 
+ * @author Ali Abdallah
+ */
+
+public class NGSrichEvaluate {
+
+	/**
+	 * An array of arguments containing the following option elements in the 
+	 * following order:
+	 * -r <readsFile> (-a|-g) <annotation> -t <target> [-s <sName>] 
+	 * [-T <tmpDir>][-o <outDir>][-p <poor> -h <high>][-no_details]
+	 * 
+	 * Required:
+	 * <readsFile> 	Path to read alignment file in SAM or BAM format.
+	 * <annotation> UCSC genome version name.
+	 * <target>		Path to target file in BED format.
+	 * 
+	 * Optional:
+	 * <sName> 		Sample name [default: prefix of <readsFile>].
+	 * <tmpDir> 	Temporary directory [default: '/tmp'].
+	 * <outDir> 	Output directory [default: '<pathToReadsFile>/enrichment'].
+	 * <poor> 		Cutoff for poor coverage [default: 2].
+	 * <high> 		Cutoff for high coverage [default: 200].
+	 */
+	String[] args;
+
+	public NGSrichEvaluate(String[] args) {
+		this.args = args;
+	}
+
+	public void evaluate() throws IOException, FileFormatException,
+			InterruptedException {
+		/**
+		 * Ordered List of Parameter (left/right): readFName genomeFName
+		 * targetFName tmpDir outDir
+		 * 
+		 */
+
+		int alen = args.length;
+		String[] params = new String[10];
+
+		String usagestr = 
+				"\nUsage: java NGSrich evaluate -r <readsFile> " 
+				+ "-u <genome-name> -t <target> [(-a|-g) "
+				+ "<annotation>] [-s <sName>] [-T <tmpDir>] "
+				+ "[-o <outDir>] [-p <poor> -h <high>][--no-details>]\n\n\tRequired:\n\t"
+				+ "<readsFile>\tPath to read alignment file in SAM or BAM format."
+				+ "\n\t<genome-name>\tUCSC genome version name.\n\t<target>\tPath "
+				+ "to target file in BED format.\n\n\tOptional:\n\t<sName>\t\t"
+				+ "Sample name [default: prefix of <readsFile>].\n\t<annotation>\t\t"
+				+ "path of the annotation file [default: the genome is " 
+				+ "downloaded based on the genome version name].\n\t<tmpDir>\t"
+				+ "Temporary directory [default: '/tmp'].\n\t<outDir>\tOutput "
+				+ "directory [default: '<pathToReadsFile>/enrichment'].\n\t"
+				+ "<poor>\t\tCutoff for poor coverage [default: 2].\n\t<high>"
+				+ "\t\tCutoff for high coverage [default: 200]. \n\t--no-details\tto repress the computation of the" +
+						" evaluation details\n";
+		
+		if (alen == 0) {
+			System.out.println(usagestr);
+			System.exit(0);
+		}
+
+		boolean t = false, o = false, h = false, po = false, sname = false, u = false, r = false, g = false,
+				a = false, T = false;
+		params[9] = "1";
+		for (int i = 0; i < alen; i = i + 2) {
+			if ((args[i].length() == 2 && args[i].charAt(0) == '-') || args[i].equals("--no-details")) {
+				char flag = (args[i].length()==2)?args[i].charAt(1):args[i].charAt(2);
+				switch (flag) {
+				case 'r': params[0] = args[i + 1];r = true;break;
+				case 'g': params[1] = args[i + 1];g = true;break;
+				case 'a': params[1] = args[i + 1];a = true;break;
+				case 't': params[2] = args[i + 1];t = true;break;
+				case 'T': params[3] = args[i + 1];T = true;break;
+				case 'o': params[4] = args[i + 1];o = true;break;
+				case 'p': params[5] = args[i + 1];po = true;break;
+				case 'h': params[6] = args[i + 1];h = true;break;
+				// Added by PF 2011-07-12
+				case 's': params[7] = args[i + 1];sname = true;break;
+				case 'u': params[8] = args[i+1]; u = true;break;
+				case 'n': params[9]="0";break;
+				}
+			} else {
+				System.out.println(usagestr);
+				System.exit(0);
+			}
+		}
+		
+		boolean required = r && t && u;
+		
+		if(!required){
+			System.out.println("Some required arguments are missing.\n");
+			System.out.println(usagestr);
+			System.exit(0);
+		}
+
+		String default_properties_file = createDefaultPropertiesFile();
+		Properties p = new Properties();
+		BufferedInputStream stream = 
+					new BufferedInputStream(
+						new FileInputStream(default_properties_file));
+
+		p.load(stream);
+		stream.close();
+		if (!T)params[3] = p.getProperty("tmpDir");
+		if (!o) {
+			String oPath = p.getProperty("outDirPath");
+			if (oPath.equals(""))oPath = Misc.path(params[0]);
+			params[4] = oPath + Misc.slash(oPath) + p.getProperty("outDir");
+			new File(params[4]).mkdir();
+		}
+		if (!po)params[5] = p.getProperty("poor");
+		if (!h)params[6] = p.getProperty("high");
+		if (!sname)params[7] = "none";
+		if (!(a||g))params[1] = "none";
+		
+		Enrichment ngs = new Enrichment(params);
+		// Convert BAM to SAM if necessary.
+		String infile = params[0];
+		if (infile.endsWith(".bam")) {
+			System.out.println("======================0======================");
+			System.out.println("\n>>> Found BAM file: converting to SAM\n");
+			infile = ngs.bam2sam();
+		}
+		ngs.readFileName = infile;
+
+		//		File timeReport = 
+		//	new File(params[3] + Misc.slash(params[3])+"TimeReport.txt");
+		//		FileWriter trWriter = new FileWriter(timeReport);
+		
+		// Reduce the files.
+		System.out.println("======================1======================");
+		System.out.println(">>> STEP 1: reducing files\n");
+		try {
+			long start = System.currentTimeMillis();
+			ngs.reduceFiles();
+			long rtime = System.currentTimeMillis() - start;
+			Time time = new Time(rtime);
+			//trWriter.write("Reducing files took: " + time + "\n");
+
+		} catch (ChromosomeMismatchException e) {
+			e.printStackTrace();
+		}
+		
+		// Compute the target coverage files.
+		System.out.println("\n======================2======================");
+		System.out.println(">>> STEP 2: computing target coverage data\n");
+		long start = System.currentTimeMillis();
+		ngs.computeTargetCoverageFiles();
+		long rtime = System.currentTimeMillis() - start;
+		Time time = new Time(rtime);
+		//		trWriter.write("Computing target coverage data took: " + time + "\n");
+		
+		// Evaluate enrichment.
+		System.out.println("\n======================3======================");
+		System.out.println(">>> STEP 3: evaluating enrichment files\n");
+		start = System.currentTimeMillis();
+		ngs.evaluate();
+		rtime = System.currentTimeMillis() - start;
+		time = new Time(rtime);
+		//trWriter.write("Evaluating enrichment files took: " + time + "\n");
+		Thread.sleep(10000);
+		
+		// Compute the target wiggle files.
+		System.out.println("\n======================4======================");
+		System.out.println(">>> STEP 4: computing targets wiggle data\n");
+		start = System.currentTimeMillis();
+		ngs.computeWiggleFile();
+		rtime = System.currentTimeMillis() - start;
+		time = new Time(rtime);
+		//trWriter.write("Computing targets wiggle data took: " + time + "\n");
+
+		// Compute the overall wiggle files.
+		System.out.println("\n======================5======================");
+		System.out.println(">>> STEP 5: computing overall wiggle data\n");
+		ngs.computeOverallWiggleFile();
+		start = System.currentTimeMillis();
+		ngs.computeOverallWiggleFile();
+		rtime = System.currentTimeMillis() - start;
+		time = new Time(rtime);
+		//trWriter.write("Computing targets wiggle data took: " + time + "\n");
+		System.out.println("\n=============================================");
+		//trWriter.close();
+	}
+
+	private String createDefaultPropertiesFile() throws IOException {
+		String default_properties_file = 
+			Misc.binDir()+Misc.slash(Misc.binDir())+"DEFAULT.properties";
+		if(!new File(default_properties_file).exists()){
+			String default_str = 
+				"! Path of the temporary directory.\n" +
+				"tmpDir: /tmp\n" +
+				"! Path of the father directory of the output directory. " +
+				"When empty the output directory is placed in the directory " +
+				"containing the reads alignment file.\n" +
+				"outDirPath:\n" +
+				"! Name of the output directory (not the path).\n" +
+				"outDir: enrichment\n" +
+				"! Define poorly covered genes.\n" +
+				"poor: 2\n" +
+				"! Defines highly covered genes.\n" +
+				"high: 200";
+			FileWriter properties_writer = 
+									new FileWriter(default_properties_file);
+			properties_writer.write(default_str);
+			properties_writer.close();
+		}
+		return default_properties_file;
+	}
+}
\ No newline at end of file