Mercurial > repos > jesse-erdmann > tapdance
comparison lib/util.pl @ 0:1437a2df99c0
Uploaded
| author | jesse-erdmann |
|---|---|
| date | Fri, 09 Dec 2011 11:56:56 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1437a2df99c0 |
|---|---|
| 1 #!/project/bioperl/perl-5.10.1-sles11/bin/perl -w | |
| 2 | |
| 3 my $dos_end = chr(13); | |
| 4 | |
| 5 my %chrom_map = ( | |
| 6 "gi|149288852|ref|NC_000067.5|NC_000067" => "chr1", | |
| 7 "gi|149288869|ref|NC_000076.5|NC_000076" => "chr10", | |
| 8 "gi|149288871|ref|NC_000077.5|NC_000077" => "chr11", | |
| 9 "gi|149292731|ref|NC_000078.5|NC_000078" => "chr12", | |
| 10 "gi|149292733|ref|NC_000079.5|NC_000079" => "chr13", | |
| 11 "gi|149292735|ref|NC_000080.5|NC_000080" => "chr14", | |
| 12 "gi|149301884|ref|NC_000081.5|NC_000081" => "chr15", | |
| 13 "gi|149304713|ref|NC_000082.5|NC_000082" => "chr16", | |
| 14 "gi|149313536|ref|NC_000083.5|NC_000083" => "chr17", | |
| 15 "gi|149321426|ref|NC_000084.5|NC_000084" => "chr18", | |
| 16 "gi|149323268|ref|NC_000085.5|NC_000085" => "chr19", | |
| 17 "gi|149338249|ref|NC_000068.6|NC_000068" => "chr2", | |
| 18 "gi|149352351|ref|NC_000069.5|NC_000069" => "chr3", | |
| 19 "gi|149354223|ref|NC_000070.5|NC_000070" => "chr4", | |
| 20 "gi|149354224|ref|NC_000071.5|NC_000071" => "chr5", | |
| 21 "gi|149361431|ref|NC_000072.5|NC_000072" => "chr6", | |
| 22 "gi|149361432|ref|NC_000073.5|NC_000073" => "chr7", | |
| 23 "gi|149361523|ref|NC_000074.5|NC_000074" => "chr8", | |
| 24 "gi|149361524|ref|NC_000075.5|NC_000075" => "chr9", | |
| 25 "gi|149361525|ref|NC_000086.6|NC_000086" => "chrX", | |
| 26 "gi|149361526|ref|NC_000087.6|NC_000087" => "chrY" | |
| 27 ); | |
| 28 | |
| 29 return 1; | |
| 30 | |
| 31 sub get_chrom_map { | |
| 32 return \%chrom_map; | |
| 33 } | |
| 34 | |
| 35 sub sanitize_project { | |
| 36 my ($project) = @_; | |
| 37 $project =~ s/@/_/g; | |
| 38 $project =~ s/\./_/g; | |
| 39 $project =~ s/\s/_/g; | |
| 40 return $project; | |
| 41 } | |
| 42 | |
| 43 sub process_fasta { | |
| 44 my $fn = shift @_; | |
| 45 my $process = shift @_; | |
| 46 my $curr_seq_id = ""; | |
| 47 my $curr_seq = ""; | |
| 48 my $entries = 0; | |
| 49 push (my @opts, @_); | |
| 50 open (FILE, "<${$fn}") || die "Unable to open ${$fn}, $!\n"; | |
| 51 while (<FILE>) { | |
| 52 chomp; | |
| 53 $_ =~ s/$dos_end//g; | |
| 54 if ($_=~/^>/) { | |
| 55 if (length($curr_seq_id) > 0 && length($curr_seq) > 0) { | |
| 56 #print join("\t", ("Joined:", $curr_seq_id, $curr_seq, @opts, "\n")); | |
| 57 &$process(\$curr_seq_id, \$curr_seq, \@opts); | |
| 58 $entries++; | |
| 59 } | |
| 60 if (($entries % 100000) == 0) { | |
| 61 my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = localtime(time); | |
| 62 print sprintf("%02d-%02d %02d:%02d:%02d Entries Processed: %9s\n", $mon+1, $day, $hour, $min, $sec, $entries); | |
| 63 } | |
| 64 $curr_seq_id = $_; | |
| 65 $curr_seq_id =~ s/>//g; | |
| 66 $curr_seq = ""; | |
| 67 } | |
| 68 elsif (length($_) > 0) { $curr_seq = join("", ($curr_seq, $_)); } | |
| 69 } | |
| 70 if (length($curr_seq_id) > 0 && length($curr_seq) > 0) { | |
| 71 &$process(\$curr_seq_id, \$curr_seq, \@opts); | |
| 72 $entries++; | |
| 73 #$entries = 1; #for performance, returning 1 as true indicating there is at least one entry processed | |
| 74 } | |
| 75 close (FILE); | |
| 76 return $entries; | |
| 77 } | |
| 78 | |
| 79 sub process_fastq { | |
| 80 my $fn = shift @_; | |
| 81 my $process = shift @_; | |
| 82 my $curr_seq_id = ""; | |
| 83 my $curr_str = ""; | |
| 84 my $curr_seq = ""; | |
| 85 my $entries = 0; | |
| 86 push (my @opts, @_); | |
| 87 open (FILE, "<${$fn}") || die "Unable to open ${$fn}, $!\n"; | |
| 88 while (<FILE>) { | |
| 89 chomp; | |
| 90 $_ =~ s/$dos_end//g; | |
| 91 if ($_=~/^@/) { | |
| 92 if (length($curr_seq_id) > 0 && length($curr_seq) > 0 && length($curr_str) > 0) { | |
| 93 #print join("\t", ("Joined:", $curr_seq_id, $curr_seq, @opts, "\n")); | |
| 94 &$process(\$curr_seq_id, \$curr_seq, \$curr_str, \@opts); | |
| 95 $entries++; | |
| 96 } | |
| 97 if (($entries % 100000) == 0) { | |
| 98 my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = localtime(time); | |
| 99 print sprintf("%02d-%02d %02d:%02d:%02d Entries Processed: %9s\n", $mon+1, $day, $hour, $min, $sec, $entries); | |
| 100 } | |
| 101 $curr_seq_id = $_; | |
| 102 $curr_seq_id =~ s/@//g; | |
| 103 $curr_seq = ""; | |
| 104 $curr_str = ""; | |
| 105 } | |
| 106 elsif ($_=~/^\+/) { | |
| 107 $curr_seq = $curr_str; | |
| 108 $curr_str = $_; | |
| 109 $curr_str =~ s/\+//g; | |
| 110 if ($curr_seq_id ne $curr_str) { | |
| 111 warn "Invalid FASTQ file, sequence id and quality score id mismatch: $curr_seq_id, $curr_str\n"; | |
| 112 return -1; | |
| 113 } | |
| 114 $curr_str = ""; | |
| 115 } | |
| 116 elsif (length($_) > 0) { $curr_str = join("", ($curr_str, $_)); } | |
| 117 } | |
| 118 if (length($curr_seq_id) > 0 && length($curr_seq) > 0 && length($curr_str) > 0) { | |
| 119 &$process(\$curr_seq_id, \$curr_seq, \$curr_str, \@opts); | |
| 120 $entries++; | |
| 121 #$entries = 1; #for performance, returning 1 as true indicating there is at least one entry processed | |
| 122 } | |
| 123 close (FILE); | |
| 124 return $entries; | |
| 125 } |
