Mercurial > repos > pfrommolt > ngsrich
diff NGSrich_0.5.5/src/filters/ReadFilter.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/NGSrich_0.5.5/src/filters/ReadFilter.java Mon Nov 21 08:12:19 2011 -0500 @@ -0,0 +1,177 @@ +package filters; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Scanner; +import datastructures.ReadLine; + +public class ReadFilter extends Filter{ + + File input, output; + + /** + * Constructs a SamAdapter object. The output of an adaption is written to the + * given file outputFileName. + * + * @param inputFileName the name of the read alignment input file. + * @param outputFileName the name of the output file containing the reduced + * format of the read alignment file. It must ends with ".red". + */ + public ReadFilter(String inputFileName, String outputFileName) { + super(inputFileName, outputFileName); + input = new File(inputFileName); + output = new File(outputFileName); + } + + /** + * <P> + * Uses ReadLine zu reduce each line of the read alignment file to following format:<BR> + * <name> <chrom> <start> <end> (tab delimited). + * </P> + * In the following we list the 12 fields of the sam-alignment-file. We mark the fields we are + * interessted in with (!!): + * <PRE> + * 1. <QNAME> : Query pair NAME if paired; or Query NAME if unpaired (Ex: 6:105:18438:14421) (!!) + * 2. <FLAG> : bitwise FLAG a₀a₁a₂a₃a₄a₅a₆a₇a₈a₉a₁₀ (Ex: 0 forward, 16 reverse strand) + * a₀ : the read is paired in sequencing, (no matter whether it is mapped in a pair) + * a₁ : the read is mapped in a proper pair + * a₂ : the query sequence itself is unmapped + * a₃ : the mate is unmapped + * a₄ : strand of the query (0 for forward; 1 for reverse strand) + * a₅ : strand of the mate + * a₆ : the read is the first read in a pair + * a₇ : the read is the second read in a pair + * a₈ : the alignment is not primary + * a₉ : the read fails platform/vendor quality checks + * a₁₀: the read is either a PCR duplicate or an optical duplicate + * 3. <RNAME> : Reference sequence NAME (Ex: chr10) (!!) + * 4. <POS> : 1-based leftmost POSition/coordinate of the clipped sequence (Ex: 60041) (!!) + * 5. <MAPQ> : MAPping Quality (Ex: 0) + * (phred-scaled posterior probability that the mapping position of this read is incorrect) + * 6. <CIGAR> : extended CIGAR string (Ex: 150M) + * 7. <MRNM> : Mate Reference sequence NaMe; “=” if the same as <RNAME> (Ex:*) + * 8. <MPOS> : 1-based leftmost Mate POSition of the clipped sequence (Ex: 0) + * 9. <ISIZE> : inferred Insert SIZE (Ex: 0) + * 10. <SEQ> : query SEQuence; “=” for a match to the reference; n/N/. for ambiguity; cases are not maintained (!!) + * (Ex: TGTTGTTGTTATTTCTGAATGACATTTACTTTGCTGCTCTTTATTTTGCG + * TATTTAAAACTATTAGATCGTGTGATTATATTTGACAGGTCTTAATTGAC + * GCGCTGTTCAGCCCTTTGAGTTCGGTTGAGTTTTGTGTTGGAGAATTTTC) + * 11. <QUAL> : query QUALity; ASCII-33 gives the Phred base quality + * (Ex: /.8349-7:95@=8999;1:=;===AABD:=@A;>AD:E:9@==69<;@B3CBC@B8B;B89=8=3;@@@.:->>B? + * C4CBB8EDGDD8GDEEDEEE8EBA9B???=B;,8:+5;;A??>?#############################) + * 12. [<TAG>:<VTYPE>:<VALUE> [...]]: TAG/Value TYPE/match <VTYPE> (space allowed) + * (Ex: XT:A:R NM:i:2 X0:i:2 X1:i:0) + * </PRE> + */ + public void filter() { + FileWriter fw = null; + Scanner s = null; + + try { + s= new Scanner(input); + } catch (FileNotFoundException e) { + System.err.println("sam file not found"); + e.printStackTrace(); + } + + try { + if(output == null){ + output = new File(input.getName(). + substring(0,input.getName().lastIndexOf("."))+".rsam"); + } + + fw = new FileWriter(output); + + } catch (IOException e) { + System.err.println("Error generating rsam file"); + e.printStackTrace(); + } + + String rawline; + ReadLine line = null; + + do{ + rawline = s.nextLine(); + }while(rawline.startsWith("@")); + + do{ + try { + line = new ReadLine(rawline); + fw.write(line+"\r\n"); + } catch (IOException e) { + System.err.println("Error writing reduced form of:\n"+rawline); + e.printStackTrace(); + } + if(s.hasNextLine()) + rawline = s.nextLine(); + }while(s.hasNextLine()); + + + try { + fw.write(line +"\r\n"); + } catch (IOException e) { + System.err.println("Error writing reduced form of:\n"+line); + e.printStackTrace(); + } + + try { + fw.close(); + } catch (IOException e) { + System.err.println("Error closing file"); + e.printStackTrace(); + } + s.close(); + + System.out.println("READS FILE:"); + System.out.println(input.getAbsolutePath()+" reduced to "+ + output.getAbsolutePath()); + sort(); + } + + + public void sort() { + Runtime rt = Runtime.getRuntime(); + try { + String rawOutput = output.getAbsolutePath(); + String outputName = output.getName(); + String pathname = output.getParentFile().getAbsolutePath()+"/"+outputName+"Sorted"; + + output = new File(pathname); + String tmpD=output.getParentFile().getAbsolutePath(); + + if(!output.exists())output.createNewFile(); + String command = "sort -k2,2 -k3n,3 -T "+tmpD+" "+rawOutput; + Process p = rt.exec(command); + Scanner ps = new Scanner(p.getInputStream()); + + FileWriter fw = new FileWriter(output); + while(ps.hasNextLine()){ + String nextLine = ps.nextLine(); + fw.write(nextLine+"\n"); + } + fw.close(); + + Scanner psStdErr=new Scanner(p.getErrorStream()); + while(psStdErr.hasNextLine()){ + String errLine=psStdErr.nextLine(); + System.out.println(errLine); + } + + new File(rawOutput).delete(); + new File(pathname).renameTo(new File(rawOutput)); + System.out.println("Reduced file "+new File(rawOutput).getAbsolutePath()+" sorted\n"); + + } catch (IOException e1) { + e1.printStackTrace(); + } + } + + + public String toString(){ + return "ReadFilter"; + } + + +}