Mercurial > repos > pfrommolt > ngsrich
view NGSrich_0.5.5/src/filters/ReadFilter.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
line wrap: on
line source
package filters; import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.util.Scanner; import datastructures.ReadLine; public class ReadFilter extends Filter{ File input, output; /** * Constructs a SamAdapter object. The output of an adaption is written to the * given file outputFileName. * * @param inputFileName the name of the read alignment input file. * @param outputFileName the name of the output file containing the reduced * format of the read alignment file. It must ends with ".red". */ public ReadFilter(String inputFileName, String outputFileName) { super(inputFileName, outputFileName); input = new File(inputFileName); output = new File(outputFileName); } /** * <P> * Uses ReadLine zu reduce each line of the read alignment file to following format:<BR> * <name> <chrom> <start> <end> (tab delimited). * </P> * In the following we list the 12 fields of the sam-alignment-file. We mark the fields we are * interessted in with (!!): * <PRE> * 1. <QNAME> : Query pair NAME if paired; or Query NAME if unpaired (Ex: 6:105:18438:14421) (!!) * 2. <FLAG> : bitwise FLAG a₀a₁a₂a₃a₄a₅a₆a₇a₈a₉a₁₀ (Ex: 0 forward, 16 reverse strand) * a₀ : the read is paired in sequencing, (no matter whether it is mapped in a pair) * a₁ : the read is mapped in a proper pair * a₂ : the query sequence itself is unmapped * a₃ : the mate is unmapped * a₄ : strand of the query (0 for forward; 1 for reverse strand) * a₅ : strand of the mate * a₆ : the read is the first read in a pair * a₇ : the read is the second read in a pair * a₈ : the alignment is not primary * a₉ : the read fails platform/vendor quality checks * a₁₀: the read is either a PCR duplicate or an optical duplicate * 3. <RNAME> : Reference sequence NAME (Ex: chr10) (!!) * 4. <POS> : 1-based leftmost POSition/coordinate of the clipped sequence (Ex: 60041) (!!) * 5. <MAPQ> : MAPping Quality (Ex: 0) * (phred-scaled posterior probability that the mapping position of this read is incorrect) * 6. <CIGAR> : extended CIGAR string (Ex: 150M) * 7. <MRNM> : Mate Reference sequence NaMe; “=” if the same as <RNAME> (Ex:*) * 8. <MPOS> : 1-based leftmost Mate POSition of the clipped sequence (Ex: 0) * 9. <ISIZE> : inferred Insert SIZE (Ex: 0) * 10. <SEQ> : query SEQuence; “=” for a match to the reference; n/N/. for ambiguity; cases are not maintained (!!) * (Ex: TGTTGTTGTTATTTCTGAATGACATTTACTTTGCTGCTCTTTATTTTGCG * TATTTAAAACTATTAGATCGTGTGATTATATTTGACAGGTCTTAATTGAC * GCGCTGTTCAGCCCTTTGAGTTCGGTTGAGTTTTGTGTTGGAGAATTTTC) * 11. <QUAL> : query QUALity; ASCII-33 gives the Phred base quality * (Ex: /.8349-7:95@=8999;1:=;===AABD:=@A;>AD:E:9@==69<;@B3CBC@B8B;B89=8=3;@@@.:->>B? * C4CBB8EDGDD8GDEEDEEE8EBA9B???=B;,8:+5;;A??>?#############################) * 12. [<TAG>:<VTYPE>:<VALUE> [...]]: TAG/Value TYPE/match <VTYPE> (space allowed) * (Ex: XT:A:R NM:i:2 X0:i:2 X1:i:0) * </PRE> */ public void filter() { FileWriter fw = null; Scanner s = null; try { s= new Scanner(input); } catch (FileNotFoundException e) { System.err.println("sam file not found"); e.printStackTrace(); } try { if(output == null){ output = new File(input.getName(). substring(0,input.getName().lastIndexOf("."))+".rsam"); } fw = new FileWriter(output); } catch (IOException e) { System.err.println("Error generating rsam file"); e.printStackTrace(); } String rawline; ReadLine line = null; do{ rawline = s.nextLine(); }while(rawline.startsWith("@")); do{ try { line = new ReadLine(rawline); fw.write(line+"\r\n"); } catch (IOException e) { System.err.println("Error writing reduced form of:\n"+rawline); e.printStackTrace(); } if(s.hasNextLine()) rawline = s.nextLine(); }while(s.hasNextLine()); try { fw.write(line +"\r\n"); } catch (IOException e) { System.err.println("Error writing reduced form of:\n"+line); e.printStackTrace(); } try { fw.close(); } catch (IOException e) { System.err.println("Error closing file"); e.printStackTrace(); } s.close(); System.out.println("READS FILE:"); System.out.println(input.getAbsolutePath()+" reduced to "+ output.getAbsolutePath()); sort(); } public void sort() { Runtime rt = Runtime.getRuntime(); try { String rawOutput = output.getAbsolutePath(); String outputName = output.getName(); String pathname = output.getParentFile().getAbsolutePath()+"/"+outputName+"Sorted"; output = new File(pathname); String tmpD=output.getParentFile().getAbsolutePath(); if(!output.exists())output.createNewFile(); String command = "sort -k2,2 -k3n,3 -T "+tmpD+" "+rawOutput; Process p = rt.exec(command); Scanner ps = new Scanner(p.getInputStream()); FileWriter fw = new FileWriter(output); while(ps.hasNextLine()){ String nextLine = ps.nextLine(); fw.write(nextLine+"\n"); } fw.close(); Scanner psStdErr=new Scanner(p.getErrorStream()); while(psStdErr.hasNextLine()){ String errLine=psStdErr.nextLine(); System.out.println(errLine); } new File(rawOutput).delete(); new File(pathname).renameTo(new File(rawOutput)); System.out.println("Reduced file "+new File(rawOutput).getAbsolutePath()+" sorted\n"); } catch (IOException e1) { e1.printStackTrace(); } } public String toString(){ return "ReadFilter"; } }