diff NGSrich_0.5.5/src/filters/ReadFilter.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NGSrich_0.5.5/src/filters/ReadFilter.java	Mon Nov 21 08:12:19 2011 -0500
@@ -0,0 +1,177 @@
+package filters;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Scanner;
+import datastructures.ReadLine;
+
+public class ReadFilter extends Filter{
+
+	File input, output;
+	
+	/**
+	* Constructs a SamAdapter object. The output of an adaption is written to the
+	* given file outputFileName.
+	* 
+	* @param inputFileName the name of the read alignment input file.
+	* @param outputFileName the name of the output file containing the reduced 
+	* format of the read alignment file. It must ends with ".red".
+	*/
+	public ReadFilter(String inputFileName, String outputFileName) {
+		super(inputFileName, outputFileName);
+		input = new File(inputFileName);
+		output = new File(outputFileName);
+	}
+	
+	/**
+	 * <P>
+	 * Uses ReadLine zu reduce each line of the read alignment file to following format:<BR>
+	 * &#60;name> &#60;chrom> &#60;start> &#60;end> (tab delimited).
+	 * </P>
+	 * In the following we list the 12 fields of the sam-alignment-file. We mark the fields we are 
+	 * interessted in with (!!):
+	 * <PRE>
+	 * 1. &#60;QNAME>	: Query pair NAME if paired; or Query NAME if unpaired 	(Ex: 6:105:18438:14421)	(!!)
+	 * 2. &#60;FLAG>	: bitwise FLAG a₀a₁a₂a₃a₄a₅a₆a₇a₈a₉a₁₀		(Ex: 0 forward, 16 reverse strand)
+	 *		  a₀ : 	the read is paired in sequencing, (no matter whether it is mapped in a pair)
+	 *		  a₁ : 	the read is mapped in a proper pair	
+	 *		  a₂ : 	the query sequence itself is unmapped
+	 *		  a₃ : 	the mate is unmapped
+	 *		  a₄ : 	strand of the query (0 for forward; 1 for reverse strand)
+	 *		  a₅ : 	strand of the mate
+	 *		  a₆ : 	the read is the first read in a pair
+	 *		  a₇ : 	the read is the second read in a pair
+	 *		  a₈ : 	the alignment is not primary
+	 *		  a₉ : 	the read fails platform/vendor quality checks
+	 *  		  a₁₀: 	the read is either a PCR duplicate or an optical duplicate
+	 * 3. &#60;RNAME>	: Reference sequence NAME					(Ex: chr10)			(!!)
+	 * 4. &#60;POS>		: 1-based leftmost POSition/coordinate of the clipped sequence 	(Ex: 60041)			(!!)
+	 * 5. &#60;MAPQ>	: MAPping Quality 						(Ex: 0)
+	 *		 			  (phred-scaled posterior probability that the mapping position of this read is incorrect)
+	 * 6. &#60;CIGAR>	: extended CIGAR string						(Ex: 150M)
+	 * 7. &#60;MRNM>	: Mate Reference sequence NaMe; “=” if the same as &#60;RNAME>	(Ex:*)
+	 * 8. &#60;MPOS>	: 1-based leftmost Mate POSition of the clipped sequence	(Ex: 0)
+	 * 9. &#60;ISIZE>	: inferred Insert SIZE						(Ex: 0)
+	 * 10. &#60;SEQ>	: query SEQuence; “=” for a match to the reference; n/N/. for ambiguity; cases are not maintained (!!)
+	 *					(Ex:	TGTTGTTGTTATTTCTGAATGACATTTACTTTGCTGCTCTTTATTTTGCG
+	 *							TATTTAAAACTATTAGATCGTGTGATTATATTTGACAGGTCTTAATTGAC
+	 *							GCGCTGTTCAGCCCTTTGAGTTCGGTTGAGTTTTGTGTTGGAGAATTTTC)
+	 * 11. &#60;QUAL>	: query QUALity; ASCII-33 gives the Phred base quality
+	 *					(Ex: 	/.8349-7:95@=8999;1:=;===AABD:=@A;>AD:E:9@==69<;@B3CBC@B8B;B89=8=3;@@@.:->>B?
+	 *							C4CBB8EDGDD8GDEEDEEE8EBA9B???=B;,8:+5;;A??>?#############################)
+	 * 12. [&#60;TAG>:&#60;VTYPE>:&#60;VALUE> [...]]: TAG/Value TYPE/match <VTYPE> (space allowed)
+	 * 			(Ex:	XT:A:R	NM:i:2	X0:i:2	X1:i:0)
+	 * </PRE>
+	 */
+	public void filter() {
+		FileWriter fw = null;
+		Scanner s = null;
+		
+		try {
+			s= new Scanner(input);
+		} catch (FileNotFoundException e) {
+			System.err.println("sam file not found");
+			e.printStackTrace();
+		}
+		
+		try {
+			if(output == null){
+				output = new File(input.getName().
+										substring(0,input.getName().lastIndexOf("."))+".rsam");
+			}
+			
+			fw = new FileWriter(output);
+		
+		} catch (IOException e) {
+			System.err.println("Error generating rsam file");
+			e.printStackTrace();
+		}
+		
+		String rawline;
+		ReadLine line = null;
+		
+		do{
+			rawline = s.nextLine();
+		}while(rawline.startsWith("@"));
+		
+		do{
+			try {
+				line = new ReadLine(rawline);
+				fw.write(line+"\r\n");
+			} catch (IOException e) {
+				System.err.println("Error writing reduced form of:\n"+rawline);
+				e.printStackTrace();
+			}
+			if(s.hasNextLine())
+				rawline = s.nextLine();
+		}while(s.hasNextLine());
+		
+		
+		try {
+			fw.write(line +"\r\n");
+		} catch (IOException e) {
+			System.err.println("Error writing reduced form of:\n"+line);
+			e.printStackTrace();
+		}
+		
+		try {
+			fw.close();
+		} catch (IOException e) {
+			System.err.println("Error closing file");
+			e.printStackTrace();
+		}
+		s.close();
+		
+		System.out.println("READS FILE:");
+		System.out.println(input.getAbsolutePath()+" reduced to "+
+				output.getAbsolutePath());
+		sort();
+	}
+	
+
+	public void sort() {
+		Runtime rt = Runtime.getRuntime();
+		try {
+			String rawOutput = output.getAbsolutePath();
+			String outputName = output.getName();
+			String pathname = output.getParentFile().getAbsolutePath()+"/"+outputName+"Sorted";
+			
+			output = new File(pathname);
+			String tmpD=output.getParentFile().getAbsolutePath();
+
+			if(!output.exists())output.createNewFile();
+			String command = "sort -k2,2 -k3n,3 -T "+tmpD+" "+rawOutput;
+			Process p = rt.exec(command);
+			Scanner ps = new Scanner(p.getInputStream());
+
+			FileWriter fw = new FileWriter(output);
+			while(ps.hasNextLine()){
+				String nextLine = ps.nextLine();
+				fw.write(nextLine+"\n");
+			}
+			fw.close();
+
+			Scanner psStdErr=new Scanner(p.getErrorStream());
+			while(psStdErr.hasNextLine()){
+				String errLine=psStdErr.nextLine();
+				System.out.println(errLine);
+			}
+		
+			new File(rawOutput).delete();
+			new File(pathname).renameTo(new File(rawOutput));
+			System.out.println("Reduced file "+new File(rawOutput).getAbsolutePath()+" sorted\n");
+			
+		} catch (IOException e1) {
+			e1.printStackTrace();
+		}
+	}
+
+	
+	public String toString(){
+		return "ReadFilter";
+	}
+
+	
+}