Mercurial > repos > venice-juanillas > convert_format

--- a/file_conversion/alchemy2matrix.pl	Mon Nov 05 23:01:59 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,172 +0,0 @@
-#!C:\Perl\bin\perl
-use warnings;
-
-###################################################################################
-## Author: Venice Margarette B. Juanillas
-## Date Created: May 10,2011
-## Program Description: This script will read an alchemy-derived text output
-##						and transform it into the matrix form getting only
-##						the ff:
-##							Sample names
-##							SNP ID
-##							AB Calls
-##						This matrix will further be utilized by other formats.
-##
-## Citation: This script was made based from Sir Mauleon's existing script.
-###################################################################################
-
-## declarations
-my $line_count = 1;
-my $line;
-my $index;
-my $hline = 1; ## flag if header line
-my $prev;
-my $firstSNP = 0; #marks the number of samples read
-my $nt; ## for the new symbol to be used
-my @row = (); ## declare all arrays null
-my @string = ();
-my @sample_id = ();
-my @nullarray = ();
-
-check();
-
-sub check{
-	## check for file parameters
-	my $param_num = scalar(@ARGV);
-	if(!@ARGV  or ($param_num < 2) or ($param_num > 3)){
-		print "Enter input, output  files and column number respectively.\n";
-	}elsif(!$ARGV[2]){
-		$ARGV[2] = 3;
-		main($ARGV[0],$ARGV[1],$ARGV[2]); ## default column parameter: 3
-	}else{
-		main($ARGV[0],$ARGV[1],$ARGV[2]); ## isama na ang col number para dynamic ang paggawa ng matrix
-	}
-}
-
-sub main{
-	my ($infile,$outfile,$col) = @_;
-
-	$index = $col; ## column to be used made global
-
-	#open input and output files
-	open(IN,"$infile") || die "Cannot open file.";
-	open(OUT,">$outfile") || die "Cannot open file.";
-
-	#read one line at a time
-	while($line = <IN>){
-		next if $line =~ /^\s*$/; # skip blank lines
-		if ($line_count <= 6){
-			$line_count++;
-			next;
-		}
-		@row = split(/\t/,$line);	#split every line delimited by tab or newline
-		# if $line is already in the first data line
-		if($hline == 1){
-			push(@Sample_id, $row[1]); # create the stack of Sample id's
-			push(@string,$row[0]); #create the stack of SNP id's
-			pushGenotype(); # call to a subroutine
-			$prev = $row[0];  #replace prev SNP name with current SNP name
-			$hline = 0;  ## flag as not 1st data line anymore
-			$firstSNP =1 ;
-			next;
-		}
-		#if within the sample SNP
-		if($row[0] eq $prev){
-			push(@Sample_id,$row[1]);
-			pushGenotype();
-			next;
-		}
-		#if out of the prev SNP, go and read the next SNP
-		if ($row[0] ne $prev) {
-			if ($hline != 1) {
-				if ($firstSNP == 1) {  ## ung 1st pass lang dapat
-					print OUT "SNP_Sample_ids"; ## print to output file...
-
-					foreach (@Sample_id) {
-						print OUT "\t$_";
-					} ##end for each
-					print OUT "\n";
-					$firstSNP = 0 ;
-				}
-
-				##print the genotype string..
-				foreach (@string) {
-					print OUT "$_\t";
-				} ## end foreach
-				print OUT "\n";
-				@string = @nullarray;
-			}
-
-			push (@string, $row[0]);  #push SNP name as 1st array element
-			$prev = $row[0];  #replace prev SNP name with current SNP name
-			$firstSNP = 0 ;
-			pushGenotype();
-			next;
-
-		} ## end if currline <> prev
-
-	}
-
-	## output lahat ng SNP names
-	foreach (@string) {
-		print OUT "$_\t";
-	}
-	print OUT "\n";
-
-	#close all files
-	close(IN);
-	close(OUT);
-}
-
-
-## this subroutine will assign the AB calls sor every SNP and Sample id's
-## based from Sir Mau's pushGenotype code
-sub pushGenotype {
-
-		$nt = $row[$index];  ##assign AB call column na pinipick-up
-
-		## check for all possible genotypes
-		if ($row[$index] eq "AA") {
-			$nt = "A/A"; ## overwrite $nt by assigning a new notation
-		}
-		if ($row[$index] eq "BB") {
-			$nt = "B/B";
-		}
-		if ($row[$index] eq "AB" || $row[$index] eq "BA")  {
-			$nt = "A/B";
-		}
-		if ($row[$index] eq " ") {
-			$nt = "-/-";
-		}
-		if ($row[$index] eq "TT") {
-			$nt = "T/T";
-		}
-		if ($row[$index] eq "CC") {
-			$nt = "C/C";
-		}
-		if ($row[$index] eq "TC" || $row[$index] eq "CT") {
-			$nt = "T/C";
-		}
-		if ($row[$index] eq "GG") {
-			$nt = "G/G";
-		}
-		if ($row[$index] eq "GC" || $row[$index] eq "CG") {
-			$nt = "C/G";
-		}
-		if ($row[$index] eq "GA" || $row[$index] eq "AG") {
-			$nt = "A/G";
-		}
-		if ($row[$index] eq "TA" || $row[$index] eq "AT") {
-			$nt = "A/T";
-		}
-		push (@string, $nt);  ## push the new symbol to the array
-		return;
-
-}
-
-## end of script
-
-
-
-
-
--- a/file_conversion/alchemy2matrix.xml	Mon Nov 05 23:01:59 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-<tool id="alchemy2matrix" name="Alchemy to Matrix">
-  <description>file converter</description>
-  <command interpreter="perl">alchemy2matrix.pl $input $output</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Source file"/>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="raw_data.txt"/>
-      <output name="out_file1" file="output.txt"/>
-    </test>
-  </tests>
-
-  <help>
-This tool converts an alchemy-output file format to a SNP matrix format.
-  </help>
-
-</tool>
\ No newline at end of file
--- a/file_conversion/matrix2powermarker.pl	Mon Nov 05 23:01:59 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-#!C:\Perl\bin\perl
-use warnings;
-
-#####################################################
-## Author: Venice Margarette B. Juanillas
-## Date: May 13,2011
-## Program Description: This script will transform a matrix into a Powerformat dataset
-##						this will utilize a matrix and transpose this matrix
-##						The column names will become the rows, the SNP ids will be the new
-##						columns
-#############################################################################################
-
-## declarations
-my $line;
-my $line_count = 1;
-my $temp;
-my $next_line = 0;
-my @row = ();
-my @array = ();
-my @string = ();
-my @SNP_ids = ();
-my @Sample_names = ();
-
-## check for file parameters
-if(!@ARGV or scalar(@ARGV) != 2){
-	print "No Parameters specified...Specify 1.) input data 2.) output file\n";
-}else{
-	main($ARGV[0],$ARGV[1]); ## call to subroutine main
-}
-
-sub main{
-	my ($infile, $outfile) = @_;
-
-	#open input and output files
-	open(IN, "$infile")|| die "Cannot open input file specified.";
-	open(OUT, ">$outfile")|| die "Cannot open input file specified.";
-
-	#read line by line
-	while($line = <IN>){
-		next if $line =~ /^\s*$/; # skip blank lines
-		if($line_count == 1){
-			@Sample_names = split(/\t|\n/,$line); ## split the header, which contains the sample names and store them to an array
-			$next_line =1;	## flag to got to the next line
-			$line_count++;
-			next;
-		}
-		if($next_line == 1){	## reading the next line
-			@row = split(/\t|\n/,$line);	## slits all elements delimited by the tabs and newlines
-			push(@SNP_ids,$row[0]);	## store all ids
-			for($i = 0; $i < $#Sample_names; $i++){
-				$Sample_names[$i] = $Sample_names[$i]."\t".$row[$i]; ## append all data markers to their respective sample names
-			}
-		}
-	}
-
-	#this is segment is mainly for outputting the transposed matrix into the file
-	#for($i = 0;$i <= $#SNP_ids;$i++){
-	#	print OUT "$SNP_ids[$i]\t";
-	#}
-	#print OUT "\n";
-	for($i = 0;$i < $#Sample_names;$i++){	## output to file all contents of the array
-		print OUT $Sample_names[$i]."\n"; 	## basically all that's in the matrix
-	}
-
-	## close files to save modifications
-	close(IN);
-	close(OUT);
-
-}
-
-## end of script
\ No newline at end of file
--- a/file_conversion/matrix2powermarker.xml	Mon Nov 05 23:01:59 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-<tool id="matrix2powermarker" name="Matrix to Powermarker">
-  <description>file conversion</description>
-  <command interpreter="perl">matrix2powermarker.pl $input $output</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Matrix file"/>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output" label="${input.name} Powermarker Format"/> <!--change labels to create more informative output names-->
-  </outputs>
-
-  <tests>
-    <test>
-      <output name="output" file="out.txt"/>
-    </test>
-  </tests>
-
-  <help>
-This tool converts a SNP matrix file to Powermarker file format.
-  </help>
-
-</tool>
\ No newline at end of file
--- a/file_conversion/matrix2qgene.pl	Mon Nov 05 23:01:59 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,391 +0,0 @@
-#!C:\PERL\bin\perl
-use warnings;
-#use Benchmark; ## for benchmark purposes
-
-#$t0 = Benchmark->new; ## Create new instace of Benchmark variable
-						## begin
-
-
-#########################################################################
-## Author: Venice Margarette B. Juanillas
-## Date Created: May 19, 2011
-## Description: This script transforms a tabular data file into the
-## QGENE data file format
-## outputs: [header]
-##			Genotype Symbols:
-##			A/A = 1
-##			B/B = 2
-##			A/B = 3
-##			C/C = 4
-##			D/D = 5
-##			-/- = 0
-##			[locus]
-##			SNP_id	cM_num	cM_pos	marker_data(123450)
-##			[Trait]
-##				*must still be appended; generated from another
-##				*file.
-##	temporary output: out2.txt
-##	final output: output.txt
-#############################################################################
-
-## declarations
-my $tempf1 = "out.txt"; ## global variables that will act as temporary files for the sort
-my $tempf2 = "out2.txt";
-my $tempf3 = "out3.txt"; ## file to temporarily store the matrix
-
-my $linecounter = 0; ## this variable will keep track of the lines that are already occupied
-
-
-check();
-
-## this subroutine will check if there are sufficient paramters issued
-sub check{
-
-	my $params = scalar(@ARGV); ##store the number of parameters
-	my $string;
-	my $append_how; ## prepend or append??
-
-	##check for command line parameters specified
-	##else prompt an error asking for files
-	if($params < 3 or $params > 5){
-		print "Incorrect file parameters...\n";
-		print "ARGV[0] = matrix file\n"; ##user created
-		print "ARGV[1] = output file\n";
-		print "ARGV[2] = map file\n";	##user created
-		print "ARGV[3] = header file\n";
-		print "ARGV[4] = trait file\n";
-	}else{
-		sort_contents($ARGV[0],$tempf1); ## sort yung laman ng matrix
-		sort_contents($ARGV[2],$tempf2); ## sort ang laman ng map
-		create_matrix($tempf1,$tempf2,$tempf3); ## creates the matrix in another file
-
-		## check if header and trait files exist
-		if(!$ARGV[3] or !$ARGV[4]){
-			if(!$ARGV[3]){
-				print "No header section\n";
-				$string = "[Header]"."\n"."[Locus]";
-				$append_how = "+>";	## prepend symbol
-				append_string($append_how.$ARGV[1],$string);
-			}
-
-			append_matrix($ARGV[1],$tempf3); ## append the matrix
-
-			if(!$ARGV[4]){
-				print "No trait section\n";
-				$string = "[Trait]";
-				$append_how = "+>>"; ## append symbol
-				append_string($append_how.$ARGV[1],$string);
-			}
-		}else{
-			append_header($ARGV[3],$ARGV[1]); ##subroutine calls
-			append_matrix($ARGV[1],$tempf3);
-			append_trait($ARGV[1],$ARGV[4]);
-		}
-	}
-}
-
-## this subroutine will read the matrix from the temporary file
-## and append it to the output file
-sub append_matrix{
-	my ($output,$matrix) = @_;
-	my $line;
-	my $linecount = 1;
-	my $to_string;
-	my @row;
-	my @lines;
-
-	## open files
-	open(IN,"$matrix")|| die "Cannot open $matrix";
-	open(OUT,"+>>$output") || die "Cannot open $output";
-
-	while($line = <IN>){
-		next if $line=~ /^\s*$/; ## skip if blank line
-		if($linecount == 1){
-			$linecount++;
-			next;
-		}else{
-			@row = split(/\t/,$line); ## split row
-			($row[0],$row[1])= ($row[1],$row[0]); ## swap cM num and SNP
-			$to_string = join("\t",@row);
-			print OUT $to_string; ## append row by row
-		}
-	}
-
-	##close files
-	close(IN);
-	close(OUT);
-}
-
-## this subroutine will create the matrix of the locus section
-## however, this will create the matrix in a temporary file
-sub create_matrix{
-	my ($input1,$input2,$output) = @_; ## input1: matrix input2: map input3: temp_out
-	my $line;
-	my $linecount = 1; ## flag if header
-	my $count = 0;
-	my $string;
-	my $hit = 0; ## flag if SNP id found
-	my @row;
-	my @markers;
-	my @SNP_rows;
-	my $alleles;
-
-	open(IN1,"$input1") || die "Cannot open $input1";
-	open(OUT,">$output") || die "Cannot open $output";
-
-	while($line = <IN1>){
-		next if $line =~ /^\s*$/; ## we skip the blank lines
-
-		## skip the headers
-		if($linecount == 1){
-			$linecount++;
-			next;
-		}elsif($linecount ge 1){
-			@row = split(/\t/,$line);
-			$hit = search_SNPid($row[0],$input2); ## search if SNP in matrix has mappings
-			$alleles = get_alleles($row[0],$input1);
-
-		## if SNP is has a cM mapping
-			if($hit == 1){
-				$string = get_mappings($row[0],$input2); ## get the chromosome mappings
-			}else{
-
-				$string = "-"."\t".$row[0]."\t"."-"."\t"; ## if there exist no mappings,cM & cM_pos= "-"
-			}
-			$SNP_rows[$count] = $string.$alleles."\n";
-			$linecount++;
-			$count++;
-		}
-	}
-	## sort the SNPs by their chromosomes
-	@SNP_rows = sort(@SNP_rows);
-
-	## print SNPs in temporary file
-	for($i = 0; $i < $#SNP_rows; $i++){
-		print OUT $SNP_rows[$i];
-	}
-
-	close(IN1);
-	close(OUT);
-}
-
-## this will form a string that comprise only of the alleles of a certain SNP id
-sub get_alleles{
-	my ($SNP, $file) = @_;
-	my $line;
-	my $linecount = 1; ## flag if we are in the header row
-	my @row;
-	my @markers; ## store here all marker data
-	my $str;
-
-	open(IN,"$file") || die "Cannot open $file.";
-
-	while($line = <IN>){
-		next if $line =~ /^\s*$/;
-		if($linecount == 1){ ## skip the header
-			$linecount++;
-			next;
-		}
-		if($linecount != 1){
-			@row= split(/\t/,$line); ## split whole row
-			if($row[0] eq $SNP){	## we change the genotype symbols from ABHCDx : 1234560
-				for($i = 1; $i < $#row; $i++){
-					if ($row[$i] eq "A/A"){
-						$row[$i] = "1"
-					}
-					if ($row[$i] eq "B/B"){
-						$row[$i] = "2"
-					}
-					if ($row[$i]eq "A/B"){
-						$row[$i] = "3"
-					}
-					if ($row[$i] eq "C/C"){
-						$row[$i] = "4"
-					}
-					if ($row[$i] eq "D/D"){
-						$row[$i] = "5"
-					}
-					if ($row[$i] eq "-/-"){
-						$row[$i] = "6"
-					}
-					push(@markers,$row[$i]); ## push all into an array
-				}
-			}
-		}
-	}
-	$str = join("\t",@markers); ## delimit the markers by tabs
-
-	close(IN);
-	return $str; ## return the string
-}
-
-## this subroutine will get the cM, cM position and the SNP ids
-sub get_mappings{
-	my ($SNP,$file) = @_;
-	my $line;
-	my $mappings;
-	my $linecount = 1; ## flag if header
-	my @row;
-
-	open(IN,$file) || die $!;
-
-	while($line = <IN>){
-		next if $line =~ /^\s*$/; ## we skip the blank lines
-		if($linecount == 1){
-			$linecount++;
-			next;
-		}else{
-			@row = split(/\t/,$line);
-			if($row[0] eq $SNP){
-				$mappings = $row[1]."\t".$row[0]."\t".$row[2]."\t";
-				return $mappings;
-			}
-			$linecount++;
-		}
-	}
-
-	close(IN);
-}
-
-## this subroutine will append the marker data
-## in the matrix
-sub change_marker{
-	my ($marker) = @_;
-
-	if ($marker eq "A/A"){
-		$marker = "1"
-	}
-	if ($marker eq "B/B"){
-		$marker = "2"
-	}
-	if ($marker eq "A/B"){
-		$marker = "3"
-	}
-	if ($marker eq "C/C"){
-		$marker = "4"
-	}
-	if ($marker eq "D/D"){
-		$marker = "5"
-	}
-	if ($marker eq "-/-"){
-		$marker = "6"
-	}
-
-	return $marker; ## return the string
-}
-
-## this subroutine checks if the SNP is in the cM mapping
-## drawback: this will be slow...
-## what if millions of SNPs? very, very slow
-## Solution: Search for the fastest search algo....
-sub search_SNPid{
-	my ($to_search,$file_to_search )= @_;
-	my $line;
-	my $flag = 0; ## mark if the SNP id exists in the map
-	my $linecount = 1;
-
-	open(FILE,"$file_to_search") || die "Cannot open $file_to_search.";
-
-	while($line = <FILE>){
-		next if $line =~ /^\s*$/;
-		if($linecount == 1){ ## we skip the headers
-			$linecount++;
-		}
-		elsif($linecount != 1){
-			@row= split(/\t|\n/,$line);
-			if($row[0] eq $to_search){ ## compare with the id in the map
-				$flag = 1;
-			}
-		}
-	}
-	close(FILE);
-
-	return $flag;
-
-}
-
-## this subroutine will sort its contents based on SNPs
-## for faster searching
-sub sort_contents{
-	my ($infile,$outfile) = @_;
-	my @array;
-
-	## open the files
-	open(IN,"$infile") || die "Cannot open $infile";
-	open(OUT,"+>$outfile") || die "Cannot open $outfile"; ## we keep the sorted contents in the temporary files
-
-	@array = <IN>;
-	@array = sort(@array); ## sort
-	print OUT @array; ## then output all
-
-	## close to save
-	close(IN);
-	close(OUT);
-}
-
-## this will append any given string to a given file
-sub append_string{
-	my ($file, $string) = @_;
-	my $line;
-
-	open(IN,"$file") || die "Cannot open $file"; ## open file
-	print IN $string."\n"; ##write/append to the file
-	close(IN); ## close
-}
-
-
-## this subroutine will get the header from header.txt
-## and write it to the output file
-sub append_header{
-	my ($infile,$outfile) = @_;
-	my $line;
-
-	## open files
-	open(IN,"$infile") || die "Cannot open file.";
-	open(OUT,">$outfile") || die "Cannot open file.";
-
-
-	##traverse through the input file
-	while($line = <IN>){
-		next if $line =~ /^\s*$/;
-		print OUT $line;
-		$linecounter++;
-	}
-
-	print OUT "\n";
-	## close all files to save modifications
-	close(IN);
-	close(OUT);
-
-}
-
-## this subroutine will append the Trait Section
-sub append_trait{
-	my ($file,$trait_file) = @_;
-	my $line;
-
-	##open file for manipulation
-	open(FILE,"+>>$file") || die "Cannot open $file";
-	open(F,"+<$trait_file") || die "Cannot open $file";
-
-	## read contents of trait file and copy to the output file
-	while($line = <F>){
-		print FILE $line;
-	}
-
-	##close files to save all modifications
-	close(FILE);
-	close(F);
-}
-
-#$t1 = Benchmark->new;
-#$td = timediff($t1, $t0);
-#print "the code took:",timestr($td),"\n";
-
-
-
-## end of the script
-## Date Completed: May 23,2011
-## Revised: May 28,2011
-## Remarks: For further testing
-
--- a/file_conversion/matrix2qgene.xml	Mon Nov 05 23:01:59 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<tool id="matrix2qgene" name="Matrix to QGene">
-  <description>file format conversion</description>
-  <command interpreter="perl">matrix2qgene.pl $matrix $output $map $header $trait</command>
-  <inputs>
-    <param format="tabular" name="matrix" type="data" label="Matrix file"></param>
-    <param format="tabular" name="map" type="data" label="Physical Map file"></param>
-    <param format="tabular" name="header" type="data" label="Header file"></param>
-    <param format="tabular" name="trait" type="data" label="Trait file"></param>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output" label="${matrix.name} QGene format"/>
-  </outputs>
-
-  <tests>
-    <test>
-      <output name="out1" file="${matrix}.qdf"/>
-    </test>
-  </tests>
-
-  <help>
-This tool converts a SNP matrix file to QGene file format.
-  </help>
-
-</tool>
\ No newline at end of file
--- a/file_conversion/matrix2structure.pl	Mon Nov 05 23:01:59 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,141 +0,0 @@
-#!C:\Perl\bin\perl
-use warnings;
-
-##########################################################################################
-## Author: Venice Margarette B. Juanillas
-## Date Created: May 27,2011
-## Script Description: This script is intended to transform a matrix into its structure
-##						data file format.
-## Disclaimer: This script is not yet complete. This script cannot still fully be used
-##						for Structure transformation.
-##########################################################################################
-
-
-
-my $string;
-my $temp_file; ## temporary file that will store the matrix before we append
-			## to the output file
-my @array;
-
-
-## check for file parameters
-if(!@ARGV or scalar(@ARGV) != 2){
-	print "No Parameters specified...Specify 1.) matrix input data 2.) output file\n";
-}else{
-	$temp_file = "line.txt";
-	transpose_data($ARGV[0],$temp_file);
-	change_symbol($temp_file,$ARGV[1]);
-}
-
-## transposes matrix data
-## Marker names will become the new rows
-## SNP_id will become the columns
-sub transpose_data{
-	my ($infile, $outfile) = @_;
-	my $line;
-	my $line_count = 1; ## start from the header
-	my $next_line = 0; ## flag jump to next line
-	my @row ;
-	my @markers;
-
-
-	## open files
-	open(IN, "$infile")|| die "Cannot open $infile";
-	open(OUT, ">$outfile")|| die "Cannot open $outfile ";
-
-	## read file per line
-	while($line = <IN>){
-		next if $line =~ /^\s*$/; # skip blank lines
-		if($line_count == 1){
-			@markers = split(/\t/,$line); ## split the header, which contains the sample names and store them to an array
-			$next_line =1;	## flag to got to the next line
-			$line_count++;
-			next;
-		}
-		if($next_line == 1){ ## go to the next line
-			@row = split(/\t/,$line); ## split line
-			for($i = 0; $i < $#row; $i++){
-
-				$markers[$i] = $markers[$i]."\t".$row[$i]; ## append all alleles to their respective sample names/markers
-			}
-
-		}
-	}
-
-	for($i = 0;$i < $#markers;$i++){	## output to file all contents of the array
-		print OUT $markers[$i]."\n"; 	## basically all that's in the matrix
-	}
-
-	## close files
-	close(IN);
-	close(OUT);
-}
-
-sub change_symbol{
-	my ($file1,$file2) = @_;
-	my $line;
-	my $linecount = 1;
-	my $new;
-	my @row;
-	my @alleles;
-	my @header;
-
-	open(IN,"$file1") || die "Cannot opne $file1";
-	open(OUT,">$file2") || die "Cannot opne $file2";
-
-
-	while($line = <IN>){
-		next if $line =~ /^\s*$/; # skip blank lines
-		if ($linecount == 1){
-			#@header = split(/\t|\s|\n/,$line);
-			print OUT $line;
-			$linecount++;
-			next;
-		}else{
-			@row = split(/\t|\n|\s/,$line);
-			for($i = 0;$i < $#row;$i++){
-				if($row[$i] eq "-/-"){
-					$row[$i] = "N";
-				}
-				if($row[$i] eq "A/A"){
-					$row[$i] = "A";
-				}
-				if($row[$i] eq "C/C"){
-					$row[$i] = "C";
-				}
-				if($row[$i] eq "A/B" || $row[$i] eq "B/B"){
-					$row[$i] = "B";
-				}
-				if($row[$i] eq "C/G" || $row[$i] eq "G/G"){
-					$row[$i] = "G";
-				}
-				if($row[$i] eq "A/T"||$row[$i] eq "T/T"){
-					$row[$i] = "AT";
-				}
-				if($row[$i] eq "T/C"){
-					$row[$i] = "TC";
-				}
-				if($row[$i] eq "B/C"){
-					$row[$i] = "BC";
-				}
-				if($row[$i] eq "A/G"){
-					$row[$i] = "AG";
-				}
-
-				#push(@alleles,$row[$i]);
-			}
-			$myline = join("\t",@row);
-		}
-		print OUT $myline."\n";
-	}
-
-
-	close(IN);
-	close(OUT);
-}
-
-
-## 1.) This still needs further familiarization of the haploid, diploid, n-row formats
-## 2.) How are alleles distributed per loci?
-
-