diff mlstAddFields.pl @ 13:59e137488c63 draft

Uploaded
author estrain
date Wed, 24 May 2023 17:24:23 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mlstAddFields.pl	Wed May 24 17:24:23 2023 +0000
@@ -0,0 +1,54 @@
+#!/usr/bin/perl
+#
+
+use List::MoreUtils qw(first_index indexes);
+
+open MLST,$ARGV[0];
+
+@mlstout=split(/\t/,<MLST>);
+
+$schema=$mlstout[1];
+$mlstST=$mlstout[2];
+
+# Assumes mlst command is installed and available in the path
+# Retrieve the location of the MLST databases
+$mlstdesc=`mlst -h | grep \"db/pubmlst\"`;
+@vals = split(/\'/,$mlstdesc);
+$mlstloc=$vals[1];
+$mlstloc=~s/bin\/\.\.//g;
+
+my(%schema);
+
+$mlstfile = "$mlstloc/$schema/$schema.txt";
+open IN,$mlstfile;
+@headers=split(/\t/,<IN>);
+my $clonal = first_index { /clonal\_complex/ } @headers;
+my $cc = first_index { /CC/ } @headers;
+my $lineage = first_index { /Lineage/ } @headers;
+my $species = first_index { /species/ } @headers;
+while($line=<IN>) {
+  chomp($line);
+  my(@vals)=split(/\t/,$line);
+  my(@desc);
+  if ($clonal>0 && length($vals[$clonal])>0) {
+    push(@desc,"clonal_complex=".$vals[$clonal]);
+  }
+  if ($cc>0 && length($vals[$clonal])>0) {
+    push(@desc,"CC=".$vals[$cc]);
+  }
+  if ($lineage>0 && length($vals[$linage])>0) {
+    push(@desc,"Lineage=".$vals[$lineage]);
+  }
+  if ($species>0 && length($vals[$species])>0) {
+    push(@desc,"species=".$vals[$species]);
+  } 
+  $schema{$vals[0]}=join(',',@desc);
+} 
+
+print shift(@mlstout)."\t".shift(@mlstout)."\t".shift(@mlstout)."\t";
+if(exists($schema{$mlstST})) {
+  print $schema{$mlstST};
+}
+foreach(@mlstout) {
+  print "\t".$_;
+}