changeset 20:237d0eef2ad3 draft

Uploaded paryml fix
author niels
date Mon, 10 Jun 2019 09:34:27 -0400
parents 0044b989f60f
children f8b2de0d849f
files 0348637001560173633/annovar_yaml/YAML_annovar.yml 0348637001560173633/annovar_yaml/YAML_arguments_annovar.yml 0348637001560173633/annovar_yaml/annovar_yaml.pl 0348637001560173633/annovar_yaml/annovar_yaml.xml annovar_yaml/YAML_annovar.yml annovar_yaml/YAML_arguments_annovar.yml annovar_yaml/annovar_yaml.pl annovar_yaml/annovar_yaml.xml
diffstat 8 files changed, 1335 insertions(+), 1335 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/0348637001560173633/annovar_yaml/YAML_annovar.yml	Mon Jun 10 09:34:27 2019 -0400
@@ -0,0 +1,72 @@
+GENERAL:
+  - APPLICATION: 'annovar'
+    PATHSCRIPTS: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/'
+    CODING_ANNOVAR: 'coding_annovar.pl'
+    TABLE_ANNOVAR: 'table_annovar.pl'
+    LOCATION_DATABASE: ' /hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/'
+    DOT2UNDERLINE: 'yes'
+    NASTRING: '.'
+    OTHERINFO: 'yes'
+    POLISH: 'yes'
+    REMOVE: 'yes'
+    THREAD: '8'
+    INPUTFORMAT: 'vcfinput'
+    SPECIES: 'human'
+    BUILD: 'hg19'
+ANALYSIS:
+  DATABASES:
+  - NAME: 'cosmic84'
+    ALIAS: 'N/A'
+    PROTOCOL: 'cosmic'
+    VERSION: '84'
+    COMMENT: '20190221'
+    AVAILABLE: 'yes'
+    REQUIRED: 'yes'
+    OPERATION: 'f' 
+    COLSWANTED: '4'
+  - NAME: 'refgene19'
+    ALIAS: 'N/A'
+    PROTOCOL: 'refgene'
+    VERSION: '19' 
+    AVAILABLE: 'yes'
+    REQUIRED: 'yes'
+    COMMENT: '20190210'
+    OPERATION: 'g'
+    HGVS: 'yes'
+    SPLICING: '6'
+    EXONSPLIC: 'yes'
+  - NAME: 'ncbiRefSeq_UMCU'
+    ALIAS: 'Annotation'
+    PROTOCOL: 'ncbiRefSeq'
+    VERSION: '_UMCU'
+    AVAILABLE: 'yes'
+    REQUIRED: 'yes'
+    COMMENT: "100519"
+    OPERATION: 'g'
+    HGVS: 'yes'
+    SPLICING: '6'
+    EXONSPLIC: 'yes'
+  - NAME: 'avsnp150'
+    ALIAS: 'dbSNP'
+    PROTOCOL: 'avsnp'
+    VERSION: '150'
+    AVAILABLE: 'yes'
+    REQUIRED: 'yes'
+    OPERATION: 'f'
+    COLSWANTED: '1'
+  - NAME: 'clinvar_20180603'
+    ALIAS: 'N/A'
+    PROTOCOL: 'clinvar'
+    VERSION: '_20180603'
+    AVAILABLE: 'yes'
+    REQUIRED: 'yes'
+    OPERATION: 'f'
+    COLSWANTED: '5'
+  - NAME: 'class100519'
+    ALIAS: 'Class'
+    PROTOCOL: 'class'
+    VERSION: '100519'
+    AVAILABLE: 'yes'
+    REQUIRED: 'yes'
+    OPERATION: 'f'
+    COLSWANTED: '1'
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/0348637001560173633/annovar_yaml/YAML_arguments_annovar.yml	Mon Jun 10 09:34:27 2019 -0400
@@ -0,0 +1,117 @@
+GENERAL:
+  - APPLICATION: 'annovar'
+    PATHSCRIPTS: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/'
+    CODING_ANNOVAR: 'coding_annovar.pl '
+    TABLE_ANNOVAR: 'table_annovar.pl '
+    LOCATION_DATABASE: ' /hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/'
+    DOT2UNDERLINE:
+      yes: '--dot2underline '
+      no: ''
+    NASTRING: '--nastring . '
+    OTHERINFO:
+      yes: '--otherinfo '
+      no: ''
+    POLISH:
+      yes: '--polish '
+      no: ''
+    REMOVE:
+      yes: '--remove '
+      no: ''
+    THREAD: '--thread 8 '
+    INPUTFORMAT:
+      vcfinput: '--vcfinput '
+    SPECIES:
+      human: 'humandb '
+      mouse: 'mousedb '
+    BUILD: '--buildver hg19 '
+ANALYSIS:
+  DATABASES:
+    - NAME: 'cosmic84'
+      ALIAS: 'N/A'
+      PROTOCOL: 'cosmic'
+      VERSION: '84'
+      COMMENT: '20190221' 
+      AVAILABLE:
+        yes: '1'
+        no: '0'
+      REQUIRED:
+        yes: '1'
+        no: '0'
+      OPERATION: 'f'
+      COLSWANTED: '--colswanted 4 '
+    - NAME: 'refgene19'
+      ALIAS: 'N/A'
+      PROTOCOL: 'refgene'
+      VERSION: '19' 
+      COMMENT: '20190210'  
+      AVAILABLE:
+        yes: '1'
+        no: '0'
+      REQUIRED:
+        yes: '1'
+        no: '0'
+      OPERATION: 'g'
+      HGVS:
+        yes: '--hgvs '
+        no: ''
+      SPLICING: '--splicing 6 '
+      EXONSPLIC:  
+        yes: '--exonicsplicing '
+        no: ''
+    - NAME: 'ncbiRefSeq_UMCU'
+      ALIAS: 'Annotation'
+      PROTOCOL: 'ncbiRefSeq'
+      VERSION: '_UMCU'
+      AVAILABLE:
+        yes: '1'
+        no: '0'
+      REQUIRED:
+        yes: '1'
+        no: '0'
+      COMMENT: "100519"
+      OPERATION: 'g'
+      HGVS:
+        yes: '--hgvs '
+        no: ''
+      SPLICING: '--splicing 6 '
+      EXONSPLIC:
+        yes: '--exonicsplicing '
+        no: ''
+    - NAME: 'avsnp150'
+      ALIAS: 'dbSNP'
+      PROTOCOL: 'avsnp'
+      VERSION: '150'
+      AVAILABLE:
+        yes: '1'
+        no: '0'
+      REQUIRED:
+        yes: '1'
+        no: '0'
+      OPERATION: 'f'
+      COLSWANTED: '--colswanted 1 '
+    - NAME: 'clinvar_20180603'
+      ALIAS: 'N/A'
+      PROTOCOL: 'clinvar'
+      VERSION: '_20180603'
+      COMMENT: 'blah'
+      AVAILABLE:
+        yes: '1'
+        no: '0'
+      REQUIRED:
+        yes: '1'
+        no: '0'
+      OPERATION: 'f'
+      COLSWANTED: '--colswanted 5 '
+    - NAME: 'class100519'
+      ALIAS: 'Class'
+      PROTOCOL: 'class'
+      VERSION: '100519'
+      AVAILABLE:
+        yes: '1'
+        no: '0'
+      REQUIRED:
+        yes: '1'
+        no: '0'
+      OPERATION: 'f'
+      COLSWANTED: '--colswanted 1 '
+      
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/0348637001560173633/annovar_yaml/annovar_yaml.pl	Mon Jun 10 09:34:27 2019 -0400
@@ -0,0 +1,1121 @@
+#/usr/bin/perl
+#This code is using v4 of the Run_Annovar_vx.pl script!
+
+##################
+#!!TEST COMMAND!!#
+##################
+#Following input required, input fixed currently to run limited Annovar settings
+#perl annovar_yaml.pl --inyml YAML_annovar.yml --invcf invcf.vcf --outvcf outvcf.vcf > log.txt
+
+#Future prospects: ability to learn to read YAML files to pick up parameters. Manual definition of command line still required!
+
+#perl modules use
+use YAML;
+use YAML::Tiny;
+use YAML::XS 'LoadFile';
+use Data::Dumper;
+#use Data::YAML::Writer;
+use Getopt::Long;
+use feature qw(postderef postderef_qq);
+use feature qw(refaliasing);
+#no warnings qw(experimental::postderef);
+use strict;
+
+#input required
+#two modes run and edit
+#Basis Yaml file [default required]
+#Yaml out file [required in edit mode]
+#species [default required]
+#build [default required]
+#protocol [default required]
+#protocolversion [default required]
+
+#basic paramters
+#Switch to enable disable Aliasses; enable=1 disable=0
+my $use_alias = "1";
+my $edit = "0";
+my $run = "0";
+my $application = "0";
+#arguments yaml file is fixed!
+#my $paryml = "YAML_arguments_annovar.yml";
+my $paryml;
+my $inyml;
+my $outyml;
+my $invcf;
+my $outvcf;
+my $yml_hash_arguments;
+my %yml_hash_arguments;
+my $yml_hash;
+my %yml_hash;
+my $yml_hash_edit;
+my %yml_hash_edit;
+my $in;
+my $in2;
+my %in;
+my %in2;
+my $writer;
+my $lookup;
+my $data_type;
+
+#parameter applications
+my %annovarparameters_array;
+my %annovarparameters_single;
+my $annovarparameters_array;
+my $annovarparameters_single;
+
+#Strings used as input parameters
+#my $input_general_location_database;
+#my $input_general_dir_scripts;
+#my $input_general_location_scripts_coding_annovar;
+#my $input_general_location_scripts_table_annovar;
+my $input_annovar_build;
+my $input_annovar_species;
+my $inprotocol;
+my $inprotocolversion;
+my @inprotocol;
+my @inprotocolversion;
+
+my $input_general_settings_dot2underline;
+my $input_general_settings_nastring;
+my $input_general_settings_otherinfo;
+my $input_general_settings_polish;
+my $input_general_settings_remove;
+my $input_general_settings_thread;
+my $input_general_settings_inputformat;
+
+my $input_databases_available;
+my $input_databases_required;
+
+my $input_databases_colswanted;
+my $input_databases_exonicsplic;
+my $input_databases_hgvs;
+my $input_databases_operation;
+my $input_databases_splicing;
+my @input_databases_colswanted;
+my @input_databases_exonicsplic;
+my @input_databases_hgvs;
+my @input_databases_operation;
+my @input_databases_splicing;
+
+#other variables
+my $script;
+my $type;
+my $count;
+my $column_build;
+my $column_species;
+my @column_build;
+my @column_species;
+my $pattern;
+my $pattern2;
+my $match;
+my $match_value;
+my @match_value;
+my @readonly;
+my %editinyml;
+my $folder_symlinks = '0';
+my $path_symlinks;
+my @softlink_building_blocks;
+
+#Fixed values for searching hashes
+my $level5 = "DATABASES";
+
+#parameters to build command
+my $protocol;
+my @protocol;
+my $operation;
+my $argument;
+
+GetOptions (
+	#Required input
+	'protocol=s' => \$inprotocol,
+	'protocolversion=s' => \$inprotocolversion,
+	'script=s' => \$script,
+	'invcf=s' => \$invcf,
+	'outvcf=s' => \$outvcf,
+        'paryml=s' => \$paryml,
+	'application=s' => \$application,
+        'inyml=s' => \$inyml,
+        'outyml=s' => \$outyml,
+        'build=s'       => \$input_annovar_build,
+        'species=s'   => \$input_annovar_species,
+	#Optional input
+	'edit' => \$edit,
+	'run' => \$run,
+	#variables in GENERAL
+	'dot2underline=s'   => \$input_general_settings_dot2underline,
+	'nastring=s'   => \$input_general_settings_nastring,
+	'otherinfo=s'   => \$input_general_settings_otherinfo,
+	'polish=s'   => \$input_general_settings_polish,
+	'remove=s'   => \$input_general_settings_remove,
+	'thread=s'   => \$input_general_settings_thread,
+	'inputformat=s'  => \$input_general_settings_inputformat,
+	#variables in DATABASES
+	'colswanted=s'   => \$input_databases_colswanted,
+	'exonicsplic=s'   => \$input_databases_exonicsplic,
+	'hgvs=s'   => \$input_databases_hgvs,
+	'operation=s'   => \$input_databases_operation,
+	'splicing=s'   => \$input_databases_splicing,
+);
+
+chomp $invcf;
+chomp $outvcf;
+
+#inactive options
+#       'drequired=s'   => \$input_databases_required,
+#       'davail=s'   => \$input_databases_available,
+
+#       'gpathdatabase=s'   => \$input_general_location_database,
+#	'gpathscripts=s'   => \$input_general_dir_scripts,
+#       'gpathcodannovar=s'   => \$input_general_location_scripts_coding_annovar,
+#       'gpathtabannovar=s'   => \$input_general_location_scripts_table_annovar,
+
+#Check input parameters
+#Do some checks whether all required arguments are given
+
+#some code
+
+#arrays with all application specific parameters
+#make sure keys are unique to allow for parsing of yml! 
+
+%annovarparameters_array = (
+	"COLSWANTED" => "$input_databases_colswanted",
+	"EXONICSPLIC" => "$input_databases_exonicsplic",
+	"HGVS" => "$input_databases_hgvs",
+	"OPERATION" => "$input_databases_operation",
+	"SPLICING" => "$input_databases_splicing",
+);
+
+%annovarparameters_single = (
+	"DOT2UNDERLINE" => "$input_general_settings_dot2underline",
+	"NASTRING" => "$input_general_settings_nastring",
+	"OTHERINFO" => "$input_general_settings_otherinfo",
+	"POLISH" => "$input_general_settings_polish",
+	"REMOVE" => "$input_general_settings_remove",
+	"THREAD" => "$input_general_settings_thread",
+	"INPUTFORMAT" => "$input_general_settings_inputformat",
+);
+
+#parameter not allowed to be edited in yml
+@readonly = ("hg19","human","mouse","mm10","annovar","VERSION","TABLE_ANNOVAR","refgene","cosmic");
+
+
+#converting input strings into input arrays
+@inprotocol = split (",", $inprotocol);
+@inprotocolversion = split (",", $inprotocolversion);
+@input_databases_colswanted = split (",", $input_databases_colswanted);
+@input_databases_exonicsplic = split (",", $input_databases_exonicsplic);
+@input_databases_hgvs = split (",", $input_databases_hgvs );
+@input_databases_operation = split (",", $input_databases_operation);
+@input_databases_splicing = split (",", $input_databases_splicing);
+
+#$in = $inyml;
+#openyml_read($in);
+#$in = $inyml;
+#load($in);
+
+#check if combination of application,species, build and protocol exists
+#$count = "0";
+#%in = $yml_hash;
+#foreach (@inprotocol) {
+#print "protocol $inprotocol[$count]\n";  
+#$pattern = "$application:ANALYSIS:$input_annovar_species:$input_annovar_build:DATABASES:$inprotocol[$count]$inprotocolversion[$count]:NAME $inprotocol[$count]$inprotocolversion[$count]";
+#@pattern = ("GENERAL:species", "GENERAL:build", "GENERAL:protocol");
+#foreach @pattern {
+#  parse($in, $_);
+#  if ($match =~ /^1$/) { print "Present!\n"; };
+#  $count++;
+#  }
+#}
+
+#$in = $inyml;
+#openyml_read($in);
+#$in = $inyml;
+#load_edit($in);
+
+#Editing section first single values, next arrays
+#print "Starting editing of strings...\n";
+#foreach my $key (keys %annovarparameters_single) {
+#    my $innermatch = "0";
+    #print "Key: $key\n";
+#    foreach my $innerkey (@readonly) {
+#        if ($key =~ /^$innerkey$/) {
+	    #print "Value not allowed to be changed!\n";  
+#            $innermatch++; };
+#        }
+#    if ($innermatch >= 1) {
+#	print "Skipping: $key\n";
+	#Do nothing
+#    } else { 
+#    my $value = $annovarparameters_single{$key};
+#    if ($value !~ /^$/) {
+#        print "key:$key value:$value\n";
+#        $pattern = "^($application:.{1,}:$key)";
+#	print "pattern: $pattern\n";
+#        %in = $yml_hash_edit; 
+#        parse($in, $pattern);
+#        print "editing value...: $match_value\n";
+#        @match_value = split (':', $match_value);
+# 	 my $max = scalar @match_value;
+# 	 $count = "0";
+#        foreach (@match_value) {
+#	   if ($count == $max-1) {
+#               print "$_\n";
+#	       $_ =~ s/^.{1,}$/$value/g;
+#	       print "after: $_ \n";
+#	   }
+#	   $count++;
+#	}
+#	print "max $max\n";
+#        if ( $max == 4 ) {
+#            $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]} = $match_value[3]
+#        }
+#    }
+#    } 
+#}
+
+#print "\n";
+#my $checkkey;
+
+#allow processing of arrays as well
+#print "Starting editing of array values...\n";
+#foreach my $key ( keys %annovarparameters_array) {
+#    $count = "0";
+    #print "COUNT: $count\n";
+    #print "KEY: $key\n";
+#    my @key = split (',', $key);
+#    foreach my $innerkey (@key) {
+#        my $innermatch = "0";
+#        foreach $checkkey (@readonly) {
+#            if ($innerkey =~ /^$checkkey$/) {
+                #Do nothing
+#                $innermatch++; };
+#            }
+#        if ($innermatch >= 1) {
+#            print "innerkey: $innerkey skipped\n";
+
+#       } else {
+#        my $value = $annovarparameters_array{$key};
+#	if ($value !~ /^$/ ) {
+#	    print "Value: $value\n";
+#	    my @value = split (',', $value);
+#	    my $innercount = "0";
+#            foreach my $innervalue (@value) {
+#		if ( $innervalue !~ /^$/ ) {
+#	            print "\nkey: $key innerkey: $innerkey check: $checkkey value: $innervalue\n";
+#	            print "innercount: $innercount\n";
+#	            $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]$inprotocolversion[$innercount]:$key)";
+#                    print "pattern: $pattern\n";
+#	            %in = $yml_hash_edit;
+#	            parse($in, $pattern);
+#		    undef %in;
+#	            print "editing value...: $match_value\n";
+#	                if ($match =~ /^1$/) {
+#                            @match_value = split (':', $match_value);
+#                            my $max = scalar @match_value;
+#                            #$count = "0";
+#                            foreach (@match_value) { 
+#				print "count: $count\n";
+#		                if ($count == $max-1) {
+#		                    print "empty_check: $_ \n";
+#			            $_ =~ s/^.{1,}$/$value[$innercount]/g;
+#			            print "empty_check_after: $_ \n";
+#		                }                                   
+#		                print "Max: $max\n";
+#                                $count++;
+#		            }
+#			    print "change: $value[$innercount]\n";
+#			    print "Max: $max\n";
+#                            if ( $max == 9 ) {
+#                                $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$match_value[7]} = $match_value[8]; 
+#                           }
+#	                } else {
+#
+#	                     if ($match =~ /^0$/) {   
+#	                         $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]:$inprotocolversion[$innercount])";
+#                                 %in = $yml_hash_edit;
+#				 parse($in, $pattern);          
+#				 undef %in;
+#	                         print "editing value...: $match_value\n";
+#	                         if ($match > 1) {
+#                                     @match_value = split (':', $match_value); 
+#	                             my $max = scalar @match_value;
+#				     $count = "0";
+#                                     print "Max: $max\n";
+#                                     if ( $max == 9 ) {
+#                                         $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$innerkey} = "$innervalue";
+#                                     }
+# 	                         }    
+#	                     }
+#	                }          
+#		} else {
+#                #Do nothing no arugment supplied  
+#		}
+#	    $innercount++;
+#	    }
+#        } else {
+#	#Do nothing no argument supplied
+#        }
+#	}
+#    }
+#$count++;
+#print "\n";
+#}
+
+#print "\nFinished editing...\n";
+
+#Create second yml file with change values originating from cli
+#$in = $outyml;
+#openyml_write($in);
+#%in = $yml_hash_edit; 
+#writeyml_edit($in);
+#$in = $outyml;
+#load($outyml);
+#print "Finished loading second yml...\n";
+
+#Load ymlhash with arguments
+#$in = $paryml;
+#openyml_read($in);
+#$in = $paryml;
+#load_arguments($in);
+#print "Finished loading arguments...\n";
+
+
+#Fill hashes with input yaml files as defined in input and fixed arguments yaml
+openyml_read ($paryml);
+openyml_read ($inyml);
+load ($inyml, %yml_hash, $yml_hash);
+load_arguments ($paryml, %yml_hash_arguments, $yml_hash_arguments);
+
+##Check input yaml files format
+##Pickup arguments from YAML to run command
+
+#########################
+#!!General parser test!!#
+#########################
+
+#Parsing both input YML files to check format, deviations results in killing the process
+print "\nParsing Input YAML File\n";
+parse ($yml_hash, $data_type); 
+print "\nParsing Argument YAML File\n";
+parse ($yml_hash_arguments, $data_type); 
+
+#Manually defined values to fill Annovar command
+#Possible to extract from YAML using parser
+my $application = 'annovar';
+
+#Possible to extract from YAML using parser
+my $ncbiRefSeq = 'ncbiRefSeq';
+my $cosmic = 'cosmic';
+my $dbsnp = 'avsnp';
+my $clinvar = 'clinvar';
+my $class = 'class';
+
+#Possible to extract from YAML using parser
+my $ncbiRefSeq_version = '_UMCU';
+my $cosmic_version = '84';
+my $dbsnp_version = '150';
+my $clinvar_version = '_20180603';
+my $class_version = '100519';
+
+#possible to extract from YAML using parser
+#species input fixed
+my $input_annovar_species = "human";
+
+#manual definition required! --> parameter in plugin galaxy / script hardcoded??
+my $language = 'perl ';
+
+#Possible to extract from YAML using parser
+my $parse1 = 'parse1';
+my $parse2 = 'parse2';
+
+#Possible to extract from YAML
+my $application_path = "$application,APPLICATION,GENERAL";
+
+my $ncbiRefSeq_path = "$ncbiRefSeq$ncbiRefSeq_version,NAME,ANALYSIS,DATABASES";
+my $cosmic_path = "$cosmic$cosmic_version,NAME,ANALYSIS,DATABASES";
+my $dbsnp_path = "$dbsnp$dbsnp_version,NAME,ANALYSIS,DATABASES";
+my $clinvar_path = "$clinvar$clinvar_version,NAME,ANALYSIS,DATABASES";
+my $class_path = "$class$class_version,NAME,ANALYSIS,DATABASES";
+
+##Code to create folder for database softlinks
+
+my $path_folder_symlinks;
+my @symlinkfolder_building_blocks;
+my $symlinks_folder_created;
+
+if ( $use_alias =~ /^1$/) {
+
+@symlinkfolder_building_blocks = ("$parse1,LOCATION_DATABASE,$application_path","$parse1,SPECIES,$application_path");
+}
+
+#Setup variables for processing parse_look function
+my $lookup;
+my %input;
+my $input;
+my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5);
+
+$symlinks_folder_created = '0';
+
+
+#Code reading species en database location from YAML
+if ( $use_alias =~ /^1$/) {
+
+  foreach (@symlinkfolder_building_blocks) {
+    $in = $inyml;
+    $in2 = $paryml;
+    #print "loop1:$_\n";
+    my @values = split /\,/, $_;
+    foreach (@values) {
+      if ( $_ !~ /^parse[12]$/ ) { 
+        print "Error while generating soflink folder [$_]...\n";
+        $path_folder_symlinks .= "$_";
+        last;
+      }
+      $lookup = "";
+      parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
+      chomp $lookup;
+      $path_folder_symlinks .= "$lookup";
+      $lookup = '';
+      %input = '';
+      $input = '';
+      last;
+    }
+  }
+
+  #Generate base name folder, numbered 0 up to endless to allow specific folder for parrallel analysis
+
+  $symlinks_folder_created = '0';
+
+  $path_folder_symlinks =~ s/^\s+|\s+$//g;
+  $folder_symlinks =~ s/^\s+|\s+$//g;
+
+  print "\n";
+  print "Folder symlinks: $path_folder_symlinks/$folder_symlinks\n";
+
+  while ($symlinks_folder_created =~ /^0$/ ) {
+    if ( -d "$path_folder_symlinks/$folder_symlinks" and -e "$path_folder_symlinks/$folder_symlinks" ) { 
+      print "Folder $path_folder_symlinks/$folder_symlinks is in use!\n";
+      $folder_symlinks++;
+    } else { 
+      print "Creating $path_folder_symlinks/$folder_symlinks...\n"; 
+      system ( "mkdir $path_folder_symlinks/$folder_symlinks" );
+      $path_symlinks = "$path_folder_symlinks/$folder_symlinks/";    
+      print "Created $path_symlinks\n";
+      $symlinks_folder_created++;
+    }
+  }
+}
+
+##Building annovar command:
+	
+#Manual definition required
+#blocks to build command
+my @command_building_blocks = ( 
+"$language",
+"$parse1,PATHSCRIPTS,$application_path",
+"$parse1,TABLE_ANNOVAR,$application_path",
+"$invcf",
+"$parse1,LOCATION_DATABASE,$application_path",
+"$parse1,SPECIES,$application_path",
+"$parse1,BUILD,$application_path",
+"$parse1,REMOVE,$application_path",
+#Manually define arguments using ALIASES or regular NAME argument based on turning ALIAS on or OFF
+#Define this in sections scanning the YAML file
+"--protocol \'",
+"$parse2,ALIAS,$ncbiRefSeq_path",
+"\,",
+"$parse2,NAME,$cosmic_path",
+"\,",
+"$parse2,ALIAS,$dbsnp_path",
+"\,",
+"$parse2,NAME,$clinvar_path",
+"\,",
+"$parse2,ALIAS,$class_path",
+"\' ",
+"--operation \'",
+"$parse2,OPERATION,$ncbiRefSeq_path",
+"\'",
+"\,",
+"\'",
+"$parse2,OPERATION,$cosmic_path",
+#"\'\,\'",
+"\'",
+"\,",
+"\'",
+"$parse2,OPERATION,$dbsnp_path",
+#"\'\,\'",
+"\'",
+"\,",
+"\'",
+"$parse2,OPERATION,$clinvar_path",
+#"\'\,\'",
+"\'",
+"\,",
+"\'",
+"$parse2,OPERATION,$class_path",
+"\' ",
+"$parse1,DOT2UNDERLINE,$application_path",
+"$parse1,OTHERINFO,$application_path",
+"$parse1,NASTRING,$application_path",
+"$parse1,INPUTFORMAT,$application_path",
+"--arg \'",
+"$parse2,HGVS,$ncbiRefSeq_path",
+"$parse2,SPLICING,$ncbiRefSeq_path",
+"$parse2,EXONSPLIC,$ncbiRefSeq_path",
+#"\'\,\'",
+"\'",
+"\,",
+"\'",
+"$parse2,COLSWANTED,$cosmic_path",
+#"\'\,\'",
+"\'",
+"\,",
+"\'",
+"$parse2,COLSWANTED,$dbsnp_path",
+#"\'\,\'",
+"\'",
+"\,",
+"\'",
+"$parse2,COLSWANTED,$clinvar_path",
+#"\'\,\'",
+"\'",
+"\,",
+"\'",
+"$parse2,COLSWANTED,$class_path",
+"\' ",
+"$parse1,THREAD,$application_path",
+"$parse1,POLISH,$application_path",
+"-outfile $outvcf "); 
+
+print "\n";
+
+
+#Create Empty command
+my $test_command = "";
+
+#Start building command to run
+foreach (@command_building_blocks) {
+  $in = $inyml;
+  $in2 = $paryml;
+  #print "loop1:$_\n";
+  if ( $_ =~ /^,$/ ) {
+    print "[NOT YAML] $_ is separator and does not require matching!\n";
+    $test_command .= "$_";
+    next;
+  }
+  my @values = split /\,/, $_;
+  foreach (@values) {
+    if ( $_ !~ /^parse[12]$/ ) { 
+      print "[NOT YAML] $_ does not require matching!\n";
+      $test_command .= "$_";
+      last;
+    }
+    $lookup = "";
+    parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
+
+    #If alias is enable determine which number is used for this analysis to pickup the correct database files
+    if ( $use_alias =~ /^1$/ ) {
+      #add code to add folder number for analysis
+      if ($values[1]=~/^SPECIES/ )   {
+        $lookup =~ s/^\s+|\s+$//g;
+        $folder_symlinks;
+        $lookup .= "/$folder_symlinks ";
+      }
+    }
+    print "[YAML]: $lookup\n";
+    $test_command .= "$lookup";
+    $lookup = '';
+    $input = '';
+    %input = '';
+    last;
+  }
+}
+
+print "\nalias:$use_alias\n";
+
+#Created softlinks voor database files
+
+my @database_info;
+
+if ( $use_alias =~ /^1$/ ) {
+  print "Entering Alias stuff..\n";
+  #Define databases in use
+  @softlink_building_blocks = (
+  "$parse2,NAME,$ncbiRefSeq_path",
+  "$parse2,NAME,$cosmic_path",
+  "$parse2,NAME,$dbsnp_path",
+  "$parse2,NAME,$clinvar_path",
+  "$parse2,NAME,$class_path" );
+
+  print "\nCreating softlinks to Annovar database files:\n";
+
+  #Use parse_lookup to find argument and create symlinks in analysis database folder
+  foreach (@softlink_building_blocks) {
+    $in = $inyml;
+    $in2 = $paryml;
+    my @values = split /\,/, $_;
+    foreach (@values) {
+      if ( $_ !~ /^parse[12]$/ ) { 
+        print "[NOT YAML] $_ does not require matching!\n";
+        last;
+      }
+      $lookup = "";
+      parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
+      print "database=$lookup\n"; 
+      #add code to add folder number for analysis
+      #print "Path_destination=$path_symlinks\n";
+      #print "Path_origin=$path_folder_symlinks\n";
+      my $database_detail = $lookup;
+
+      $lookup = "";
+      parse_lookup ("parse1", "BUILD", "$application","APPLICATION","GENERAL", %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
+      print "Species: $lookup\n";
+      my @build = split /\s/, $lookup;
+      #print "Species=$build[1]\n";
+      my $species = $build[1];
+
+      $lookup = ''; 
+      print "$values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5]\n";
+      parse_lookup ($values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
+      #print "Alias=$lookup\n";
+      my $alias = $lookup;
+
+      #Based on database and presence of either index file or not run softlink commands
+      if ( $alias =~ /^N\/A$/ ) {
+        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$database_detail.txt";
+        print "command: $command\n";
+        system ( $command );
+      } elsif ( $alias !~ /^N\/A$/ ) {
+        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$alias.txt";
+        print "command2: $command\n";
+        system ( $command );
+      } 
+      if ( $alias =~ /^dbSNP$/ ) {
+        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt.idx $path_symlinks$species\_$alias.txt.idx";
+        print "command3: $command\n";
+        system ( $command );
+      }
+      if ( $alias =~ /^Annotation$/ ) {
+        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail\Mrna.fa $path_symlinks$species\_$alias\Mrna.fa";
+        print "command4: $command\n";
+        system ( $command );
+      }
+      if ( $database_detail =~ /^clinvar/ ) {
+        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt.idx $path_symlinks$species\_$database_detail.txt.idx";
+        print "command3: $command\n";
+        system ( $command );
+      }
+
+      #Push all softlinks used in array
+      if ( $alias !~ /^N\/A$/ ) {
+        push @database_info, "$alias=$species\_$database_detail";
+      }
+
+
+      $lookup = '';
+      $input = '';
+      %input = '';
+      last;
+    }
+  }
+  print "Done preparing databases!\n";
+}
+
+#Run Generated annovar command
+
+print "\nResulting in following command: $test_command\n";
+#################################################
+#!!IMPORTANT# DECOMMENT FOR ACTIVE USE!!#
+system ($test_command);
+#################################################
+
+my $addition_vcf;
+
+if ( $use_alias =~ /^1$/ ) {
+
+  #Add translation for common databases to specific version 
+  $addition_vcf;
+  my $database_details = $path_symlinks.'database_details.txt';
+  $addition_vcf .= "##";
+  foreach (@database_info) {
+    $addition_vcf .= ":$_";
+  }  	
+
+  #Add information to vcf output file
+  open(OUTVCF_EDIT, '>', "$outvcf\_edit") or die "Could not open file '$outvcf' $!";
+  print "\nDone open edit output vcf file...\n";
+  open(OUTVCF, '<', "$outvcf") or die "Could not open file '$outvcf' $!";
+  print "Done reading vcf output vcf file...\n\n";
+
+
+  while ( my $line = <OUTVCF> ) {
+    my @row = split ( "\t", $line );
+    if ( $row[0] =~ /^#CHROM$/ ) {
+      print OUTVCF_EDIT "$addition_vcf\n";
+      print OUTVCF_EDIT "$line";
+    } else {
+      print OUTVCF_EDIT "$line";
+    }
+  }
+
+  close OUTVCF_EDIT;
+  close OUTVCF;
+
+  #Replace orginal vcf file with edited vcf file with additional information
+  my $mv_command = "mv $outvcf\_edit $outvcf";
+  system ($mv_command);
+
+  print "\nAdding line to vcf file:\n";
+  print "$addition_vcf\n\n";
+
+  #Remove folder containing softlinks
+  print "Removing temporary directory:\n";
+  print "rm -r $path_symlinks\n\n";
+  system ( "rm -r $path_symlinks" ); 
+}
+
+print "Job done program stopping.\n";
+
+exit;
+
+#####################
+#!!!END OF SCRIPT!!!#
+####################
+
+#############
+#SUBROUTINES#
+#############
+
+#############
+#Subroutine to obtain datatype variable
+#############
+sub test_type {
+
+my $input = $_[0];
+$data_type = ref($input);
+return $data_type;
+
+}
+
+
+#############
+#Subroutine to parse and check yaml for correct format
+#############
+sub parse {
+
+#@inyml = @{$yml_hash->{$pattern4}{$pattern5}};
+#@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}};
+
+my $input_hash = $_[0];
+
+my ($key_a, $key_a2, $key_b, $key_b2, $key_c, $key_c2);
+my ($value_a, $value_a2, $value_b, $value_b2, $value_c, $value_c2);
+my (@hash, @hash2, $hash, $hash2);
+
+my $count_a = '0';
+my $count_a2 = '0';
+my $count_a3 = '0';
+my $count_a4 = '0';
+my $count_b = '0';
+my $count_b2 = '0';
+my $count_b3 = '0';
+my $count_b4 = '0';
+my $count_c = '0';
+my $count_c2 = '0';
+my $count_c3 = '0';
+my $count_c4 = '0';
+
+for $key_a (keys %$input_hash) { 
+  $count_a++;
+  for $value_a (values %$input_hash) {
+    $count_a2++;
+    if ( $count_a =~ /^$count_a2$/ ) {
+      test_type($value_a);
+      if ($data_type =~ /^HASH$/) {
+        for $key_a2 ( keys %{$input_hash->{$key_a}} ) {
+          $count_a3++;
+	  for $value_a2 ( values  %{$input_hash->{$key_a}} ) {
+            $count_a4++;
+            if ($count_a3 =~ /^$count_a4$/) {
+	      test_type($value_a2);
+              if ($data_type =~ /^HASH$/) {
+		print "[ERROR1] Found unexpected yml file format [ key: $key_a2 ] , please check whether your YAML file meets requirements!\n";
+		exit;
+	      } elsif ($data_type =~ /^ARRAY$/) {
+                @hash = @{$input_hash->{$key_a}{$key_a2}};
+		foreach $hash ( @hash ) {
+		  for $key_b ( keys %$hash ) { 
+		    $count_b++;
+		    for $value_b (values %$hash ) {
+                      $count_b2++;
+                      if ($count_b =~ /^$count_b2$/) {
+                        test_type($value_b);
+                        if ( $data_type =~ /^HASH$/ ) {
+			  for $key_b2 ( keys %{$hash->{$key_b}} ) { 
+			    $count_b3++;
+			    for $value_b2 (values %{$hash->{$key_b}} ) {  
+		              $count_b4++;
+			      if ($count_b3 =~ /^$count_b4/) {
+                                test_type($value_b2);
+      			        if ( $data_type =~ /^HASH$/ ) {
+			        print "[ERROR2] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n";
+				exit;
+		                } elsif ( $data_type =~ /^ARRAY$/ ) {  
+			        print "[ERROR3] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n";
+				exit;
+			        } else {
+				#Assume value level 4
+				print "value_b2: $key_a:$key_a2:$key_b:$key_b2:$value_b2\n";
+			        }
+		              }    
+			    }
+                            $count_b4 = "0";
+                          }
+			  $count_b3 = "0";
+		        } 
+			elsif ( $data_type =~ /^ARRAY$/ ) {
+			  print "[ERROR4] Found unexpected yml file format [ key: $key_b ] , please check whether your YAML file meets requirements!\n";
+			  exit;
+                        } else {     
+			  #assume value level 3
+			  print "value_b: $key_a:$key_a2:$key_b:$value_b\n";
+		        }	
+                      }
+            	    }
+                  $count_b2 = '0';
+                  }
+	        $count_b = '0';
+	        }    
+              } else {	
+	        #Assume value level 2
+		print "Value_a2: $key_a:$value_a2\n";
+              }   
+            }
+          }
+        $count_a4 = '0';
+        }
+      $count_a3 = '0';
+      } 
+      elsif ($data_type =~ /^ARRAY/) {
+        @hash2 = @{$input_hash->{$key_a}}; 
+	foreach $hash2 ( @hash2 ) {
+	  for my $key_c ( keys %$hash2 ) {
+	    $count_c++;
+	    for $value_c ( values %$hash2 ) {
+              $count_c2++;
+	      if ($count_c =~ /^$count_c2$/) {
+                test_type($value_c);
+                if ( $data_type =~ /^HASH$/ ) {
+		  for $key_c2 ( keys %{$hash2->{$key_c}} ) {
+		    $count_c3++;
+		    for $value_c2 ( values %{$hash2->{$key_c}} ) {
+		      $count_c4++;
+		      if ( $count_c3 =~ /^$count_c4$/ ) {
+                        test_type($value_c2);
+                        if ( $data_type =~ /^HASH$/ ) {
+                          print "[ERROR5] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n";
+		          exit;
+		        } elsif ( $data_type =~ /^ARRAY$/ ) {
+                          print "[ERROR6] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n";
+		          exit;
+		        } else { 
+			  #Assume level 4
+			  print "value_c2: $key_a:$key_a2:$key_c:$key_c2:$value_c2\n";
+			}
+		      }
+		    }
+                  $count_c4 = '0';
+	          }
+                $count_c3 = '0';	  
+		} 
+		elsif ( $data_type =~ /^ARRAY$/ ) {
+		  print "[ERROR7] Found unexpected yml file format [ key: $key_c ] , please check whether your YAML file meets requirements!\n";
+		  exit;
+		} else {
+		  #assume value level 3
+                  print "value_c: $key_a:$key_c:$value_c\n";
+         	}
+              }
+	    } 
+          $count_c2 = '0';
+          }
+        $count_c = '0'; 
+        }
+      } else { 
+        #Assume value level 1
+	print "value_a: $key_a:$value_a\n";
+      }
+    }   
+  }
+$count_a2 = '0';
+}
+$count_a = '0';
+
+$data_type = "";
+
+}
+
+#########################################
+#Subroutine to fetch arguments from YAML#
+#########################################
+
+sub parse_lookup {
+
+my $hash;
+my %hash;
+my $hash_a;
+my %hash_a;
+my %hash_a2;
+my $hash_a2;
+my @inyml;
+my @inarguments;
+my $count;
+my $match;
+my $match2;
+my $argument;
+my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5);
+
+if ( $_[0] =~ /^parse1$/ ) {
+$pattern1 = $_[1];
+$pattern2 = $_[2];
+$pattern3 = $_[3];
+$pattern4 = $_[4];
+@inyml = @{$yml_hash->{$pattern4}};
+@inarguments = @{$yml_hash_arguments->{$pattern4}};
+} elsif ( $_[0] =~ /^parse2$/) {
+$pattern1 = $_[1];
+$pattern2 = $_[2];
+$pattern3 = $_[3];
+$pattern4 = $_[4];
+$pattern5 = $_[5];
+@inyml = @{$yml_hash->{$pattern4}{$pattern5}};
+@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}};
+}
+
+$match = "0";
+
+foreach my $hash ( @inyml ) {
+  $hash = \%$hash;
+  if ($hash->{$pattern3} =~ /^$pattern2$/ ) { 
+    my $value = $hash->{$pattern1};
+    if ($value eq "" ) {    
+      print "Error unknown argument \"$pattern1\" for annotation database \"$pattern2\" in input YAML!\n";
+      exit;
+    } 
+    foreach my $hash_a (@inarguments) {
+      my $hash_a2 = $hash_a->{$pattern1};
+      if ( $hash_a->{$pattern3} =~ /^$pattern2$/ ) {
+        $match2++;
+        if ($hash_a2 =~ /^HASH/ ) {
+          $argument = $hash_a2->{$value};
+        } else {    
+        $argument = $hash_a2;
+        }      
+      if ($argument eq "" ) {
+        print "Unknown given option \"$value\" for argument \"$pattern1\" in YAML arguments file or input YAML for database \"$pattern2\"!\n";
+	print "Please check your input and start the plugin again.\n";
+	exit;
+        }	
+      }
+      if ($match2 =~ /^1$/) { 
+        #Do nothing 
+      } elsif ( $match2 > 1 ) {
+        print "Error Multiple matches for database \"$pattern2\" database in YAML argumentsfile.\n";
+	print "Please check your input and start the plugin again.\n";
+	exit;
+      } elsif ( $match2 =~ /^0$/ ) {
+        print "Error no matches for  database \"$pattern2\" in YAML argumentsfile.\n";
+	print "Please check your input and start the plugin again.\n";
+	exit;
+      } 
+    }
+    $match++;  
+    $lookup = $argument;    
+  }
+}
+if ($match =~ /^1$/) {
+  #Do nothing 
+} else {
+  print "match: $match \n";
+  print "Error in yaml file multiple instances matching input yaml, please check input!\n";
+  exit;
+}
+
+return $lookup;
+
+}
+
+#############
+#Subroutine for opening out / edited yaml file
+#############
+sub openyml_write {
+my $outyml = $in; 
+#Create second yml file with change values originating from cli
+open(OUT, '>', "$outyml") or die "Could not open file '$outyml' $!";
+print "Done creating output file...\n";
+}
+
+#############
+#Subroutine to check whether yaml file exists
+#############
+sub openyml_read { 
+$in = $_[0];
+chomp $in;
+my $inyml = $in;
+open(FILE, '<', "$inyml") or die "Could not open file '$inyml' $!";
+close FILE;
+print "Done reading\n";
+}
+
+#############
+#Subroutine for reading argument yaml file in hash
+##############
+sub load_arguments {
+$in = $_[0];
+$yml_hash_arguments = LoadFile($in);
+return($yml_hash_arguments, %yml_hash_arguments);
+}
+
+#############
+#Subroutine for reading input yaml file in hash
+##############
+sub load {
+$in = $_[0];
+$yml_hash = LoadFile($in);
+return($yml_hash, %yml_hash);
+}
+
+#############
+#Subroutine for reading edited input yml file in hash
+##############
+sub load_edit {
+$in = $_[0];
+$yml_hash_edit = LoadFile($in);
+}
+
+#############
+#Subroutine for editing content yaml hash
+##############
+sub writeyml {
+my $yml_hash = $in;
+$writer = sub {
+    my $line = shift;
+    print FILE "$line\n";
+};
+my $file = Data::YAML::Writer->new;
+$file->write( $yml_hash, $writer );
+}
+
+#############
+#Subroutine for editing content edited yaml hash
+##############
+sub writeyml_edit {
+my $yml_hash = $in;
+$writer = sub {
+    my $line = shift;
+    print OUT "$line\n";
+};
+my $file = Data::YAML::Writer->new;
+$file->write( $yml_hash_edit, $writer );
+close OUT;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/0348637001560173633/annovar_yaml/annovar_yaml.xml	Mon Jun 10 09:34:27 2019 -0400
@@ -0,0 +1,25 @@
+<tool id="annovar_yaml" name="Annovar YAML" version="1.0">
+    <requirements>
+        <requirement type="package" version="1.29">perl-yaml</requirement>
+        <requirement type="package" version="1.73">perl-yaml-tiny</requirement>
+        <requirement type="package" version="0.74">perl-yaml-xs</requirement>
+        <requirement type="package" version="2.173">perl-data-dumper</requirement>
+        <requirement type="package" version="2.50">perl-getopt-long</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        perl $__tool_directory__/annovar_yaml.pl --paryml /data/annovar/YAML_arguments_annovar.yml --invcf $vcf --inyml $yaml --outvcf $output
+    ]]></command>
+
+    <inputs>
+
+        <param type="data" name="yaml" format="txt" />
+        <param type="data" name="vcf" format="vcf" />
+
+    </inputs>
+    <outputs>
+        <data name="output" format="vcf"/>
+    </outputs>
+    <help><![CDATA[
+        TODO: Fill in help for Annovar YAML tool. Added paryml
+    ]]></help>
+</tool>
--- a/annovar_yaml/YAML_annovar.yml	Mon Jun 10 09:21:03 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-GENERAL:
-  - APPLICATION: 'annovar'
-    PATHSCRIPTS: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/'
-    CODING_ANNOVAR: 'coding_annovar.pl'
-    TABLE_ANNOVAR: 'table_annovar.pl'
-    LOCATION_DATABASE: ' /hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/'
-    DOT2UNDERLINE: 'yes'
-    NASTRING: '.'
-    OTHERINFO: 'yes'
-    POLISH: 'yes'
-    REMOVE: 'yes'
-    THREAD: '8'
-    INPUTFORMAT: 'vcfinput'
-    SPECIES: 'human'
-    BUILD: 'hg19'
-ANALYSIS:
-  DATABASES:
-  - NAME: 'cosmic84'
-    ALIAS: 'N/A'
-    PROTOCOL: 'cosmic'
-    VERSION: '84'
-    COMMENT: '20190221'
-    AVAILABLE: 'yes'
-    REQUIRED: 'yes'
-    OPERATION: 'f' 
-    COLSWANTED: '4'
-  - NAME: 'refgene19'
-    ALIAS: 'N/A'
-    PROTOCOL: 'refgene'
-    VERSION: '19' 
-    AVAILABLE: 'yes'
-    REQUIRED: 'yes'
-    COMMENT: '20190210'
-    OPERATION: 'g'
-    HGVS: 'yes'
-    SPLICING: '6'
-    EXONSPLIC: 'yes'
-  - NAME: 'ncbiRefSeq_UMCU'
-    ALIAS: 'Annotation'
-    PROTOCOL: 'ncbiRefSeq'
-    VERSION: '_UMCU'
-    AVAILABLE: 'yes'
-    REQUIRED: 'yes'
-    COMMENT: "100519"
-    OPERATION: 'g'
-    HGVS: 'yes'
-    SPLICING: '6'
-    EXONSPLIC: 'yes'
-  - NAME: 'avsnp150'
-    ALIAS: 'dbSNP'
-    PROTOCOL: 'avsnp'
-    VERSION: '150'
-    AVAILABLE: 'yes'
-    REQUIRED: 'yes'
-    OPERATION: 'f'
-    COLSWANTED: '1'
-  - NAME: 'clinvar_20180603'
-    ALIAS: 'N/A'
-    PROTOCOL: 'clinvar'
-    VERSION: '_20180603'
-    AVAILABLE: 'yes'
-    REQUIRED: 'yes'
-    OPERATION: 'f'
-    COLSWANTED: '5'
-  - NAME: 'class100519'
-    ALIAS: 'Class'
-    PROTOCOL: 'class'
-    VERSION: '100519'
-    AVAILABLE: 'yes'
-    REQUIRED: 'yes'
-    OPERATION: 'f'
-    COLSWANTED: '1'
--- a/annovar_yaml/YAML_arguments_annovar.yml	Mon Jun 10 09:21:03 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-GENERAL:
-  - APPLICATION: 'annovar'
-    PATHSCRIPTS: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/'
-    CODING_ANNOVAR: 'coding_annovar.pl '
-    TABLE_ANNOVAR: 'table_annovar.pl '
-    LOCATION_DATABASE: ' /hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/'
-    DOT2UNDERLINE:
-      yes: '--dot2underline '
-      no: ''
-    NASTRING: '--nastring . '
-    OTHERINFO:
-      yes: '--otherinfo '
-      no: ''
-    POLISH:
-      yes: '--polish '
-      no: ''
-    REMOVE:
-      yes: '--remove '
-      no: ''
-    THREAD: '--thread 8 '
-    INPUTFORMAT:
-      vcfinput: '--vcfinput '
-    SPECIES:
-      human: 'humandb '
-      mouse: 'mousedb '
-    BUILD: '--buildver hg19 '
-ANALYSIS:
-  DATABASES:
-    - NAME: 'cosmic84'
-      ALIAS: 'N/A'
-      PROTOCOL: 'cosmic'
-      VERSION: '84'
-      COMMENT: '20190221' 
-      AVAILABLE:
-        yes: '1'
-        no: '0'
-      REQUIRED:
-        yes: '1'
-        no: '0'
-      OPERATION: 'f'
-      COLSWANTED: '--colswanted 4 '
-    - NAME: 'refgene19'
-      ALIAS: 'N/A'
-      PROTOCOL: 'refgene'
-      VERSION: '19' 
-      COMMENT: '20190210'  
-      AVAILABLE:
-        yes: '1'
-        no: '0'
-      REQUIRED:
-        yes: '1'
-        no: '0'
-      OPERATION: 'g'
-      HGVS:
-        yes: '--hgvs '
-        no: ''
-      SPLICING: '--splicing 6 '
-      EXONSPLIC:  
-        yes: '--exonicsplicing '
-        no: ''
-    - NAME: 'ncbiRefSeq_UMCU'
-      ALIAS: 'Annotation'
-      PROTOCOL: 'ncbiRefSeq'
-      VERSION: '_UMCU'
-      AVAILABLE:
-        yes: '1'
-        no: '0'
-      REQUIRED:
-        yes: '1'
-        no: '0'
-      COMMENT: "100519"
-      OPERATION: 'g'
-      HGVS:
-        yes: '--hgvs '
-        no: ''
-      SPLICING: '--splicing 6 '
-      EXONSPLIC:
-        yes: '--exonicsplicing '
-        no: ''
-    - NAME: 'avsnp150'
-      ALIAS: 'dbSNP'
-      PROTOCOL: 'avsnp'
-      VERSION: '150'
-      AVAILABLE:
-        yes: '1'
-        no: '0'
-      REQUIRED:
-        yes: '1'
-        no: '0'
-      OPERATION: 'f'
-      COLSWANTED: '--colswanted 1 '
-    - NAME: 'clinvar_20180603'
-      ALIAS: 'N/A'
-      PROTOCOL: 'clinvar'
-      VERSION: '_20180603'
-      COMMENT: 'blah'
-      AVAILABLE:
-        yes: '1'
-        no: '0'
-      REQUIRED:
-        yes: '1'
-        no: '0'
-      OPERATION: 'f'
-      COLSWANTED: '--colswanted 5 '
-    - NAME: 'class100519'
-      ALIAS: 'Class'
-      PROTOCOL: 'class'
-      VERSION: '100519'
-      AVAILABLE:
-        yes: '1'
-        no: '0'
-      REQUIRED:
-        yes: '1'
-        no: '0'
-      OPERATION: 'f'
-      COLSWANTED: '--colswanted 1 '
-      
--- a/annovar_yaml/annovar_yaml.pl	Mon Jun 10 09:21:03 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1121 +0,0 @@
-#/usr/bin/perl
-#This code is using v4 of the Run_Annovar_vx.pl script!
-
-##################
-#!!TEST COMMAND!!#
-##################
-#Following input required, input fixed currently to run limited Annovar settings
-#perl annovar_yaml.pl --inyml YAML_annovar.yml --invcf invcf.vcf --outvcf outvcf.vcf > log.txt
-
-#Future prospects: ability to learn to read YAML files to pick up parameters. Manual definition of command line still required!
-
-#perl modules use
-use YAML;
-use YAML::Tiny;
-use YAML::XS 'LoadFile';
-use Data::Dumper;
-#use Data::YAML::Writer;
-use Getopt::Long;
-use feature qw(postderef postderef_qq);
-use feature qw(refaliasing);
-#no warnings qw(experimental::postderef);
-use strict;
-
-#input required
-#two modes run and edit
-#Basis Yaml file [default required]
-#Yaml out file [required in edit mode]
-#species [default required]
-#build [default required]
-#protocol [default required]
-#protocolversion [default required]
-
-#basic paramters
-#Switch to enable disable Aliasses; enable=1 disable=0
-my $use_alias = "1";
-my $edit = "0";
-my $run = "0";
-my $application = "0";
-#arguments yaml file is fixed!
-#my $paryml = "YAML_arguments_annovar.yml";
-my $paryml;
-my $inyml;
-my $outyml;
-my $invcf;
-my $outvcf;
-my $yml_hash_arguments;
-my %yml_hash_arguments;
-my $yml_hash;
-my %yml_hash;
-my $yml_hash_edit;
-my %yml_hash_edit;
-my $in;
-my $in2;
-my %in;
-my %in2;
-my $writer;
-my $lookup;
-my $data_type;
-
-#parameter applications
-my %annovarparameters_array;
-my %annovarparameters_single;
-my $annovarparameters_array;
-my $annovarparameters_single;
-
-#Strings used as input parameters
-#my $input_general_location_database;
-#my $input_general_dir_scripts;
-#my $input_general_location_scripts_coding_annovar;
-#my $input_general_location_scripts_table_annovar;
-my $input_annovar_build;
-my $input_annovar_species;
-my $inprotocol;
-my $inprotocolversion;
-my @inprotocol;
-my @inprotocolversion;
-
-my $input_general_settings_dot2underline;
-my $input_general_settings_nastring;
-my $input_general_settings_otherinfo;
-my $input_general_settings_polish;
-my $input_general_settings_remove;
-my $input_general_settings_thread;
-my $input_general_settings_inputformat;
-
-my $input_databases_available;
-my $input_databases_required;
-
-my $input_databases_colswanted;
-my $input_databases_exonicsplic;
-my $input_databases_hgvs;
-my $input_databases_operation;
-my $input_databases_splicing;
-my @input_databases_colswanted;
-my @input_databases_exonicsplic;
-my @input_databases_hgvs;
-my @input_databases_operation;
-my @input_databases_splicing;
-
-#other variables
-my $script;
-my $type;
-my $count;
-my $column_build;
-my $column_species;
-my @column_build;
-my @column_species;
-my $pattern;
-my $pattern2;
-my $match;
-my $match_value;
-my @match_value;
-my @readonly;
-my %editinyml;
-my $folder_symlinks = '0';
-my $path_symlinks;
-my @softlink_building_blocks;
-
-#Fixed values for searching hashes
-my $level5 = "DATABASES";
-
-#parameters to build command
-my $protocol;
-my @protocol;
-my $operation;
-my $argument;
-
-GetOptions (
-	#Required input
-	'protocol=s' => \$inprotocol,
-	'protocolversion=s' => \$inprotocolversion,
-	'script=s' => \$script,
-	'invcf=s' => \$invcf,
-	'outvcf=s' => \$outvcf,
-        'paryml=s' => \$paryml,
-	'application=s' => \$application,
-        'inyml=s' => \$inyml,
-        'outyml=s' => \$outyml,
-        'build=s'       => \$input_annovar_build,
-        'species=s'   => \$input_annovar_species,
-	#Optional input
-	'edit' => \$edit,
-	'run' => \$run,
-	#variables in GENERAL
-	'dot2underline=s'   => \$input_general_settings_dot2underline,
-	'nastring=s'   => \$input_general_settings_nastring,
-	'otherinfo=s'   => \$input_general_settings_otherinfo,
-	'polish=s'   => \$input_general_settings_polish,
-	'remove=s'   => \$input_general_settings_remove,
-	'thread=s'   => \$input_general_settings_thread,
-	'inputformat=s'  => \$input_general_settings_inputformat,
-	#variables in DATABASES
-	'colswanted=s'   => \$input_databases_colswanted,
-	'exonicsplic=s'   => \$input_databases_exonicsplic,
-	'hgvs=s'   => \$input_databases_hgvs,
-	'operation=s'   => \$input_databases_operation,
-	'splicing=s'   => \$input_databases_splicing,
-);
-
-chomp $invcf;
-chomp $outvcf;
-
-#inactive options
-#       'drequired=s'   => \$input_databases_required,
-#       'davail=s'   => \$input_databases_available,
-
-#       'gpathdatabase=s'   => \$input_general_location_database,
-#	'gpathscripts=s'   => \$input_general_dir_scripts,
-#       'gpathcodannovar=s'   => \$input_general_location_scripts_coding_annovar,
-#       'gpathtabannovar=s'   => \$input_general_location_scripts_table_annovar,
-
-#Check input parameters
-#Do some checks whether all required arguments are given
-
-#some code
-
-#arrays with all application specific parameters
-#make sure keys are unique to allow for parsing of yml! 
-
-%annovarparameters_array = (
-	"COLSWANTED" => "$input_databases_colswanted",
-	"EXONICSPLIC" => "$input_databases_exonicsplic",
-	"HGVS" => "$input_databases_hgvs",
-	"OPERATION" => "$input_databases_operation",
-	"SPLICING" => "$input_databases_splicing",
-);
-
-%annovarparameters_single = (
-	"DOT2UNDERLINE" => "$input_general_settings_dot2underline",
-	"NASTRING" => "$input_general_settings_nastring",
-	"OTHERINFO" => "$input_general_settings_otherinfo",
-	"POLISH" => "$input_general_settings_polish",
-	"REMOVE" => "$input_general_settings_remove",
-	"THREAD" => "$input_general_settings_thread",
-	"INPUTFORMAT" => "$input_general_settings_inputformat",
-);
-
-#parameter not allowed to be edited in yml
-@readonly = ("hg19","human","mouse","mm10","annovar","VERSION","TABLE_ANNOVAR","refgene","cosmic");
-
-
-#converting input strings into input arrays
-@inprotocol = split (",", $inprotocol);
-@inprotocolversion = split (",", $inprotocolversion);
-@input_databases_colswanted = split (",", $input_databases_colswanted);
-@input_databases_exonicsplic = split (",", $input_databases_exonicsplic);
-@input_databases_hgvs = split (",", $input_databases_hgvs );
-@input_databases_operation = split (",", $input_databases_operation);
-@input_databases_splicing = split (",", $input_databases_splicing);
-
-#$in = $inyml;
-#openyml_read($in);
-#$in = $inyml;
-#load($in);
-
-#check if combination of application,species, build and protocol exists
-#$count = "0";
-#%in = $yml_hash;
-#foreach (@inprotocol) {
-#print "protocol $inprotocol[$count]\n";  
-#$pattern = "$application:ANALYSIS:$input_annovar_species:$input_annovar_build:DATABASES:$inprotocol[$count]$inprotocolversion[$count]:NAME $inprotocol[$count]$inprotocolversion[$count]";
-#@pattern = ("GENERAL:species", "GENERAL:build", "GENERAL:protocol");
-#foreach @pattern {
-#  parse($in, $_);
-#  if ($match =~ /^1$/) { print "Present!\n"; };
-#  $count++;
-#  }
-#}
-
-#$in = $inyml;
-#openyml_read($in);
-#$in = $inyml;
-#load_edit($in);
-
-#Editing section first single values, next arrays
-#print "Starting editing of strings...\n";
-#foreach my $key (keys %annovarparameters_single) {
-#    my $innermatch = "0";
-    #print "Key: $key\n";
-#    foreach my $innerkey (@readonly) {
-#        if ($key =~ /^$innerkey$/) {
-	    #print "Value not allowed to be changed!\n";  
-#            $innermatch++; };
-#        }
-#    if ($innermatch >= 1) {
-#	print "Skipping: $key\n";
-	#Do nothing
-#    } else { 
-#    my $value = $annovarparameters_single{$key};
-#    if ($value !~ /^$/) {
-#        print "key:$key value:$value\n";
-#        $pattern = "^($application:.{1,}:$key)";
-#	print "pattern: $pattern\n";
-#        %in = $yml_hash_edit; 
-#        parse($in, $pattern);
-#        print "editing value...: $match_value\n";
-#        @match_value = split (':', $match_value);
-# 	 my $max = scalar @match_value;
-# 	 $count = "0";
-#        foreach (@match_value) {
-#	   if ($count == $max-1) {
-#               print "$_\n";
-#	       $_ =~ s/^.{1,}$/$value/g;
-#	       print "after: $_ \n";
-#	   }
-#	   $count++;
-#	}
-#	print "max $max\n";
-#        if ( $max == 4 ) {
-#            $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]} = $match_value[3]
-#        }
-#    }
-#    } 
-#}
-
-#print "\n";
-#my $checkkey;
-
-#allow processing of arrays as well
-#print "Starting editing of array values...\n";
-#foreach my $key ( keys %annovarparameters_array) {
-#    $count = "0";
-    #print "COUNT: $count\n";
-    #print "KEY: $key\n";
-#    my @key = split (',', $key);
-#    foreach my $innerkey (@key) {
-#        my $innermatch = "0";
-#        foreach $checkkey (@readonly) {
-#            if ($innerkey =~ /^$checkkey$/) {
-                #Do nothing
-#                $innermatch++; };
-#            }
-#        if ($innermatch >= 1) {
-#            print "innerkey: $innerkey skipped\n";
-
-#       } else {
-#        my $value = $annovarparameters_array{$key};
-#	if ($value !~ /^$/ ) {
-#	    print "Value: $value\n";
-#	    my @value = split (',', $value);
-#	    my $innercount = "0";
-#            foreach my $innervalue (@value) {
-#		if ( $innervalue !~ /^$/ ) {
-#	            print "\nkey: $key innerkey: $innerkey check: $checkkey value: $innervalue\n";
-#	            print "innercount: $innercount\n";
-#	            $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]$inprotocolversion[$innercount]:$key)";
-#                    print "pattern: $pattern\n";
-#	            %in = $yml_hash_edit;
-#	            parse($in, $pattern);
-#		    undef %in;
-#	            print "editing value...: $match_value\n";
-#	                if ($match =~ /^1$/) {
-#                            @match_value = split (':', $match_value);
-#                            my $max = scalar @match_value;
-#                            #$count = "0";
-#                            foreach (@match_value) { 
-#				print "count: $count\n";
-#		                if ($count == $max-1) {
-#		                    print "empty_check: $_ \n";
-#			            $_ =~ s/^.{1,}$/$value[$innercount]/g;
-#			            print "empty_check_after: $_ \n";
-#		                }                                   
-#		                print "Max: $max\n";
-#                                $count++;
-#		            }
-#			    print "change: $value[$innercount]\n";
-#			    print "Max: $max\n";
-#                            if ( $max == 9 ) {
-#                                $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$match_value[7]} = $match_value[8]; 
-#                           }
-#	                } else {
-#
-#	                     if ($match =~ /^0$/) {   
-#	                         $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]:$inprotocolversion[$innercount])";
-#                                 %in = $yml_hash_edit;
-#				 parse($in, $pattern);          
-#				 undef %in;
-#	                         print "editing value...: $match_value\n";
-#	                         if ($match > 1) {
-#                                     @match_value = split (':', $match_value); 
-#	                             my $max = scalar @match_value;
-#				     $count = "0";
-#                                     print "Max: $max\n";
-#                                     if ( $max == 9 ) {
-#                                         $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$innerkey} = "$innervalue";
-#                                     }
-# 	                         }    
-#	                     }
-#	                }          
-#		} else {
-#                #Do nothing no arugment supplied  
-#		}
-#	    $innercount++;
-#	    }
-#        } else {
-#	#Do nothing no argument supplied
-#        }
-#	}
-#    }
-#$count++;
-#print "\n";
-#}
-
-#print "\nFinished editing...\n";
-
-#Create second yml file with change values originating from cli
-#$in = $outyml;
-#openyml_write($in);
-#%in = $yml_hash_edit; 
-#writeyml_edit($in);
-#$in = $outyml;
-#load($outyml);
-#print "Finished loading second yml...\n";
-
-#Load ymlhash with arguments
-#$in = $paryml;
-#openyml_read($in);
-#$in = $paryml;
-#load_arguments($in);
-#print "Finished loading arguments...\n";
-
-
-#Fill hashes with input yaml files as defined in input and fixed arguments yaml
-openyml_read ($paryml);
-openyml_read ($inyml);
-load ($inyml, %yml_hash, $yml_hash);
-load_arguments ($paryml, %yml_hash_arguments, $yml_hash_arguments);
-
-##Check input yaml files format
-##Pickup arguments from YAML to run command
-
-#########################
-#!!General parser test!!#
-#########################
-
-#Parsing both input YML files to check format, deviations results in killing the process
-print "\nParsing Input YAML File\n";
-parse ($yml_hash, $data_type); 
-print "\nParsing Argument YAML File\n";
-parse ($yml_hash_arguments, $data_type); 
-
-#Manually defined values to fill Annovar command
-#Possible to extract from YAML using parser
-my $application = 'annovar';
-
-#Possible to extract from YAML using parser
-my $ncbiRefSeq = 'ncbiRefSeq';
-my $cosmic = 'cosmic';
-my $dbsnp = 'avsnp';
-my $clinvar = 'clinvar';
-my $class = 'class';
-
-#Possible to extract from YAML using parser
-my $ncbiRefSeq_version = '_UMCU';
-my $cosmic_version = '84';
-my $dbsnp_version = '150';
-my $clinvar_version = '_20180603';
-my $class_version = '100519';
-
-#possible to extract from YAML using parser
-#species input fixed
-my $input_annovar_species = "human";
-
-#manual definition required! --> parameter in plugin galaxy / script hardcoded??
-my $language = 'perl ';
-
-#Possible to extract from YAML using parser
-my $parse1 = 'parse1';
-my $parse2 = 'parse2';
-
-#Possible to extract from YAML
-my $application_path = "$application,APPLICATION,GENERAL";
-
-my $ncbiRefSeq_path = "$ncbiRefSeq$ncbiRefSeq_version,NAME,ANALYSIS,DATABASES";
-my $cosmic_path = "$cosmic$cosmic_version,NAME,ANALYSIS,DATABASES";
-my $dbsnp_path = "$dbsnp$dbsnp_version,NAME,ANALYSIS,DATABASES";
-my $clinvar_path = "$clinvar$clinvar_version,NAME,ANALYSIS,DATABASES";
-my $class_path = "$class$class_version,NAME,ANALYSIS,DATABASES";
-
-##Code to create folder for database softlinks
-
-my $path_folder_symlinks;
-my @symlinkfolder_building_blocks;
-my $symlinks_folder_created;
-
-if ( $use_alias =~ /^1$/) {
-
-@symlinkfolder_building_blocks = ("$parse1,LOCATION_DATABASE,$application_path","$parse1,SPECIES,$application_path");
-}
-
-#Setup variables for processing parse_look function
-my $lookup;
-my %input;
-my $input;
-my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5);
-
-$symlinks_folder_created = '0';
-
-
-#Code reading species en database location from YAML
-if ( $use_alias =~ /^1$/) {
-
-  foreach (@symlinkfolder_building_blocks) {
-    $in = $inyml;
-    $in2 = $paryml;
-    #print "loop1:$_\n";
-    my @values = split /\,/, $_;
-    foreach (@values) {
-      if ( $_ !~ /^parse[12]$/ ) { 
-        print "Error while generating soflink folder [$_]...\n";
-        $path_folder_symlinks .= "$_";
-        last;
-      }
-      $lookup = "";
-      parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
-      chomp $lookup;
-      $path_folder_symlinks .= "$lookup";
-      $lookup = '';
-      %input = '';
-      $input = '';
-      last;
-    }
-  }
-
-  #Generate base name folder, numbered 0 up to endless to allow specific folder for parrallel analysis
-
-  $symlinks_folder_created = '0';
-
-  $path_folder_symlinks =~ s/^\s+|\s+$//g;
-  $folder_symlinks =~ s/^\s+|\s+$//g;
-
-  print "\n";
-  print "Folder symlinks: $path_folder_symlinks/$folder_symlinks\n";
-
-  while ($symlinks_folder_created =~ /^0$/ ) {
-    if ( -d "$path_folder_symlinks/$folder_symlinks" and -e "$path_folder_symlinks/$folder_symlinks" ) { 
-      print "Folder $path_folder_symlinks/$folder_symlinks is in use!\n";
-      $folder_symlinks++;
-    } else { 
-      print "Creating $path_folder_symlinks/$folder_symlinks...\n"; 
-      system ( "mkdir $path_folder_symlinks/$folder_symlinks" );
-      $path_symlinks = "$path_folder_symlinks/$folder_symlinks/";    
-      print "Created $path_symlinks\n";
-      $symlinks_folder_created++;
-    }
-  }
-}
-
-##Building annovar command:
-	
-#Manual definition required
-#blocks to build command
-my @command_building_blocks = ( 
-"$language",
-"$parse1,PATHSCRIPTS,$application_path",
-"$parse1,TABLE_ANNOVAR,$application_path",
-"$invcf",
-"$parse1,LOCATION_DATABASE,$application_path",
-"$parse1,SPECIES,$application_path",
-"$parse1,BUILD,$application_path",
-"$parse1,REMOVE,$application_path",
-#Manually define arguments using ALIASES or regular NAME argument based on turning ALIAS on or OFF
-#Define this in sections scanning the YAML file
-"--protocol \'",
-"$parse2,ALIAS,$ncbiRefSeq_path",
-"\,",
-"$parse2,NAME,$cosmic_path",
-"\,",
-"$parse2,ALIAS,$dbsnp_path",
-"\,",
-"$parse2,NAME,$clinvar_path",
-"\,",
-"$parse2,ALIAS,$class_path",
-"\' ",
-"--operation \'",
-"$parse2,OPERATION,$ncbiRefSeq_path",
-"\'",
-"\,",
-"\'",
-"$parse2,OPERATION,$cosmic_path",
-#"\'\,\'",
-"\'",
-"\,",
-"\'",
-"$parse2,OPERATION,$dbsnp_path",
-#"\'\,\'",
-"\'",
-"\,",
-"\'",
-"$parse2,OPERATION,$clinvar_path",
-#"\'\,\'",
-"\'",
-"\,",
-"\'",
-"$parse2,OPERATION,$class_path",
-"\' ",
-"$parse1,DOT2UNDERLINE,$application_path",
-"$parse1,OTHERINFO,$application_path",
-"$parse1,NASTRING,$application_path",
-"$parse1,INPUTFORMAT,$application_path",
-"--arg \'",
-"$parse2,HGVS,$ncbiRefSeq_path",
-"$parse2,SPLICING,$ncbiRefSeq_path",
-"$parse2,EXONSPLIC,$ncbiRefSeq_path",
-#"\'\,\'",
-"\'",
-"\,",
-"\'",
-"$parse2,COLSWANTED,$cosmic_path",
-#"\'\,\'",
-"\'",
-"\,",
-"\'",
-"$parse2,COLSWANTED,$dbsnp_path",
-#"\'\,\'",
-"\'",
-"\,",
-"\'",
-"$parse2,COLSWANTED,$clinvar_path",
-#"\'\,\'",
-"\'",
-"\,",
-"\'",
-"$parse2,COLSWANTED,$class_path",
-"\' ",
-"$parse1,THREAD,$application_path",
-"$parse1,POLISH,$application_path",
-"-outfile $outvcf "); 
-
-print "\n";
-
-
-#Create Empty command
-my $test_command = "";
-
-#Start building command to run
-foreach (@command_building_blocks) {
-  $in = $inyml;
-  $in2 = $paryml;
-  #print "loop1:$_\n";
-  if ( $_ =~ /^,$/ ) {
-    print "[NOT YAML] $_ is separator and does not require matching!\n";
-    $test_command .= "$_";
-    next;
-  }
-  my @values = split /\,/, $_;
-  foreach (@values) {
-    if ( $_ !~ /^parse[12]$/ ) { 
-      print "[NOT YAML] $_ does not require matching!\n";
-      $test_command .= "$_";
-      last;
-    }
-    $lookup = "";
-    parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
-
-    #If alias is enable determine which number is used for this analysis to pickup the correct database files
-    if ( $use_alias =~ /^1$/ ) {
-      #add code to add folder number for analysis
-      if ($values[1]=~/^SPECIES/ )   {
-        $lookup =~ s/^\s+|\s+$//g;
-        $folder_symlinks;
-        $lookup .= "/$folder_symlinks ";
-      }
-    }
-    print "[YAML]: $lookup\n";
-    $test_command .= "$lookup";
-    $lookup = '';
-    $input = '';
-    %input = '';
-    last;
-  }
-}
-
-print "\nalias:$use_alias\n";
-
-#Created softlinks voor database files
-
-my @database_info;
-
-if ( $use_alias =~ /^1$/ ) {
-  print "Entering Alias stuff..\n";
-  #Define databases in use
-  @softlink_building_blocks = (
-  "$parse2,NAME,$ncbiRefSeq_path",
-  "$parse2,NAME,$cosmic_path",
-  "$parse2,NAME,$dbsnp_path",
-  "$parse2,NAME,$clinvar_path",
-  "$parse2,NAME,$class_path" );
-
-  print "\nCreating softlinks to Annovar database files:\n";
-
-  #Use parse_lookup to find argument and create symlinks in analysis database folder
-  foreach (@softlink_building_blocks) {
-    $in = $inyml;
-    $in2 = $paryml;
-    my @values = split /\,/, $_;
-    foreach (@values) {
-      if ( $_ !~ /^parse[12]$/ ) { 
-        print "[NOT YAML] $_ does not require matching!\n";
-        last;
-      }
-      $lookup = "";
-      parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
-      print "database=$lookup\n"; 
-      #add code to add folder number for analysis
-      #print "Path_destination=$path_symlinks\n";
-      #print "Path_origin=$path_folder_symlinks\n";
-      my $database_detail = $lookup;
-
-      $lookup = "";
-      parse_lookup ("parse1", "BUILD", "$application","APPLICATION","GENERAL", %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
-      print "Species: $lookup\n";
-      my @build = split /\s/, $lookup;
-      #print "Species=$build[1]\n";
-      my $species = $build[1];
-
-      $lookup = ''; 
-      print "$values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5]\n";
-      parse_lookup ($values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
-      #print "Alias=$lookup\n";
-      my $alias = $lookup;
-
-      #Based on database and presence of either index file or not run softlink commands
-      if ( $alias =~ /^N\/A$/ ) {
-        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$database_detail.txt";
-        print "command: $command\n";
-        system ( $command );
-      } elsif ( $alias !~ /^N\/A$/ ) {
-        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$alias.txt";
-        print "command2: $command\n";
-        system ( $command );
-      } 
-      if ( $alias =~ /^dbSNP$/ ) {
-        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt.idx $path_symlinks$species\_$alias.txt.idx";
-        print "command3: $command\n";
-        system ( $command );
-      }
-      if ( $alias =~ /^Annotation$/ ) {
-        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail\Mrna.fa $path_symlinks$species\_$alias\Mrna.fa";
-        print "command4: $command\n";
-        system ( $command );
-      }
-      if ( $database_detail =~ /^clinvar/ ) {
-        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt.idx $path_symlinks$species\_$database_detail.txt.idx";
-        print "command3: $command\n";
-        system ( $command );
-      }
-
-      #Push all softlinks used in array
-      if ( $alias !~ /^N\/A$/ ) {
-        push @database_info, "$alias=$species\_$database_detail";
-      }
-
-
-      $lookup = '';
-      $input = '';
-      %input = '';
-      last;
-    }
-  }
-  print "Done preparing databases!\n";
-}
-
-#Run Generated annovar command
-
-print "\nResulting in following command: $test_command\n";
-#################################################
-#!!IMPORTANT# DECOMMENT FOR ACTIVE USE!!#
-system ($test_command);
-#################################################
-
-my $addition_vcf;
-
-if ( $use_alias =~ /^1$/ ) {
-
-  #Add translation for common databases to specific version 
-  $addition_vcf;
-  my $database_details = $path_symlinks.'database_details.txt';
-  $addition_vcf .= "##";
-  foreach (@database_info) {
-    $addition_vcf .= ":$_";
-  }  	
-
-  #Add information to vcf output file
-  open(OUTVCF_EDIT, '>', "$outvcf\_edit") or die "Could not open file '$outvcf' $!";
-  print "\nDone open edit output vcf file...\n";
-  open(OUTVCF, '<', "$outvcf") or die "Could not open file '$outvcf' $!";
-  print "Done reading vcf output vcf file...\n\n";
-
-
-  while ( my $line = <OUTVCF> ) {
-    my @row = split ( "\t", $line );
-    if ( $row[0] =~ /^#CHROM$/ ) {
-      print OUTVCF_EDIT "$addition_vcf\n";
-      print OUTVCF_EDIT "$line";
-    } else {
-      print OUTVCF_EDIT "$line";
-    }
-  }
-
-  close OUTVCF_EDIT;
-  close OUTVCF;
-
-  #Replace orginal vcf file with edited vcf file with additional information
-  my $mv_command = "mv $outvcf\_edit $outvcf";
-  system ($mv_command);
-
-  print "\nAdding line to vcf file:\n";
-  print "$addition_vcf\n\n";
-
-  #Remove folder containing softlinks
-  print "Removing temporary directory:\n";
-  print "rm -r $path_symlinks\n\n";
-  system ( "rm -r $path_symlinks" ); 
-}
-
-print "Job done program stopping.\n";
-
-exit;
-
-#####################
-#!!!END OF SCRIPT!!!#
-####################
-
-#############
-#SUBROUTINES#
-#############
-
-#############
-#Subroutine to obtain datatype variable
-#############
-sub test_type {
-
-my $input = $_[0];
-$data_type = ref($input);
-return $data_type;
-
-}
-
-
-#############
-#Subroutine to parse and check yaml for correct format
-#############
-sub parse {
-
-#@inyml = @{$yml_hash->{$pattern4}{$pattern5}};
-#@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}};
-
-my $input_hash = $_[0];
-
-my ($key_a, $key_a2, $key_b, $key_b2, $key_c, $key_c2);
-my ($value_a, $value_a2, $value_b, $value_b2, $value_c, $value_c2);
-my (@hash, @hash2, $hash, $hash2);
-
-my $count_a = '0';
-my $count_a2 = '0';
-my $count_a3 = '0';
-my $count_a4 = '0';
-my $count_b = '0';
-my $count_b2 = '0';
-my $count_b3 = '0';
-my $count_b4 = '0';
-my $count_c = '0';
-my $count_c2 = '0';
-my $count_c3 = '0';
-my $count_c4 = '0';
-
-for $key_a (keys %$input_hash) { 
-  $count_a++;
-  for $value_a (values %$input_hash) {
-    $count_a2++;
-    if ( $count_a =~ /^$count_a2$/ ) {
-      test_type($value_a);
-      if ($data_type =~ /^HASH$/) {
-        for $key_a2 ( keys %{$input_hash->{$key_a}} ) {
-          $count_a3++;
-	  for $value_a2 ( values  %{$input_hash->{$key_a}} ) {
-            $count_a4++;
-            if ($count_a3 =~ /^$count_a4$/) {
-	      test_type($value_a2);
-              if ($data_type =~ /^HASH$/) {
-		print "[ERROR1] Found unexpected yml file format [ key: $key_a2 ] , please check whether your YAML file meets requirements!\n";
-		exit;
-	      } elsif ($data_type =~ /^ARRAY$/) {
-                @hash = @{$input_hash->{$key_a}{$key_a2}};
-		foreach $hash ( @hash ) {
-		  for $key_b ( keys %$hash ) { 
-		    $count_b++;
-		    for $value_b (values %$hash ) {
-                      $count_b2++;
-                      if ($count_b =~ /^$count_b2$/) {
-                        test_type($value_b);
-                        if ( $data_type =~ /^HASH$/ ) {
-			  for $key_b2 ( keys %{$hash->{$key_b}} ) { 
-			    $count_b3++;
-			    for $value_b2 (values %{$hash->{$key_b}} ) {  
-		              $count_b4++;
-			      if ($count_b3 =~ /^$count_b4/) {
-                                test_type($value_b2);
-      			        if ( $data_type =~ /^HASH$/ ) {
-			        print "[ERROR2] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n";
-				exit;
-		                } elsif ( $data_type =~ /^ARRAY$/ ) {  
-			        print "[ERROR3] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n";
-				exit;
-			        } else {
-				#Assume value level 4
-				print "value_b2: $key_a:$key_a2:$key_b:$key_b2:$value_b2\n";
-			        }
-		              }    
-			    }
-                            $count_b4 = "0";
-                          }
-			  $count_b3 = "0";
-		        } 
-			elsif ( $data_type =~ /^ARRAY$/ ) {
-			  print "[ERROR4] Found unexpected yml file format [ key: $key_b ] , please check whether your YAML file meets requirements!\n";
-			  exit;
-                        } else {     
-			  #assume value level 3
-			  print "value_b: $key_a:$key_a2:$key_b:$value_b\n";
-		        }	
-                      }
-            	    }
-                  $count_b2 = '0';
-                  }
-	        $count_b = '0';
-	        }    
-              } else {	
-	        #Assume value level 2
-		print "Value_a2: $key_a:$value_a2\n";
-              }   
-            }
-          }
-        $count_a4 = '0';
-        }
-      $count_a3 = '0';
-      } 
-      elsif ($data_type =~ /^ARRAY/) {
-        @hash2 = @{$input_hash->{$key_a}}; 
-	foreach $hash2 ( @hash2 ) {
-	  for my $key_c ( keys %$hash2 ) {
-	    $count_c++;
-	    for $value_c ( values %$hash2 ) {
-              $count_c2++;
-	      if ($count_c =~ /^$count_c2$/) {
-                test_type($value_c);
-                if ( $data_type =~ /^HASH$/ ) {
-		  for $key_c2 ( keys %{$hash2->{$key_c}} ) {
-		    $count_c3++;
-		    for $value_c2 ( values %{$hash2->{$key_c}} ) {
-		      $count_c4++;
-		      if ( $count_c3 =~ /^$count_c4$/ ) {
-                        test_type($value_c2);
-                        if ( $data_type =~ /^HASH$/ ) {
-                          print "[ERROR5] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n";
-		          exit;
-		        } elsif ( $data_type =~ /^ARRAY$/ ) {
-                          print "[ERROR6] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n";
-		          exit;
-		        } else { 
-			  #Assume level 4
-			  print "value_c2: $key_a:$key_a2:$key_c:$key_c2:$value_c2\n";
-			}
-		      }
-		    }
-                  $count_c4 = '0';
-	          }
-                $count_c3 = '0';	  
-		} 
-		elsif ( $data_type =~ /^ARRAY$/ ) {
-		  print "[ERROR7] Found unexpected yml file format [ key: $key_c ] , please check whether your YAML file meets requirements!\n";
-		  exit;
-		} else {
-		  #assume value level 3
-                  print "value_c: $key_a:$key_c:$value_c\n";
-         	}
-              }
-	    } 
-          $count_c2 = '0';
-          }
-        $count_c = '0'; 
-        }
-      } else { 
-        #Assume value level 1
-	print "value_a: $key_a:$value_a\n";
-      }
-    }   
-  }
-$count_a2 = '0';
-}
-$count_a = '0';
-
-$data_type = "";
-
-}
-
-#########################################
-#Subroutine to fetch arguments from YAML#
-#########################################
-
-sub parse_lookup {
-
-my $hash;
-my %hash;
-my $hash_a;
-my %hash_a;
-my %hash_a2;
-my $hash_a2;
-my @inyml;
-my @inarguments;
-my $count;
-my $match;
-my $match2;
-my $argument;
-my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5);
-
-if ( $_[0] =~ /^parse1$/ ) {
-$pattern1 = $_[1];
-$pattern2 = $_[2];
-$pattern3 = $_[3];
-$pattern4 = $_[4];
-@inyml = @{$yml_hash->{$pattern4}};
-@inarguments = @{$yml_hash_arguments->{$pattern4}};
-} elsif ( $_[0] =~ /^parse2$/) {
-$pattern1 = $_[1];
-$pattern2 = $_[2];
-$pattern3 = $_[3];
-$pattern4 = $_[4];
-$pattern5 = $_[5];
-@inyml = @{$yml_hash->{$pattern4}{$pattern5}};
-@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}};
-}
-
-$match = "0";
-
-foreach my $hash ( @inyml ) {
-  $hash = \%$hash;
-  if ($hash->{$pattern3} =~ /^$pattern2$/ ) { 
-    my $value = $hash->{$pattern1};
-    if ($value eq "" ) {    
-      print "Error unknown argument \"$pattern1\" for annotation database \"$pattern2\" in input YAML!\n";
-      exit;
-    } 
-    foreach my $hash_a (@inarguments) {
-      my $hash_a2 = $hash_a->{$pattern1};
-      if ( $hash_a->{$pattern3} =~ /^$pattern2$/ ) {
-        $match2++;
-        if ($hash_a2 =~ /^HASH/ ) {
-          $argument = $hash_a2->{$value};
-        } else {    
-        $argument = $hash_a2;
-        }      
-      if ($argument eq "" ) {
-        print "Unknown given option \"$value\" for argument \"$pattern1\" in YAML arguments file or input YAML for database \"$pattern2\"!\n";
-	print "Please check your input and start the plugin again.\n";
-	exit;
-        }	
-      }
-      if ($match2 =~ /^1$/) { 
-        #Do nothing 
-      } elsif ( $match2 > 1 ) {
-        print "Error Multiple matches for database \"$pattern2\" database in YAML argumentsfile.\n";
-	print "Please check your input and start the plugin again.\n";
-	exit;
-      } elsif ( $match2 =~ /^0$/ ) {
-        print "Error no matches for  database \"$pattern2\" in YAML argumentsfile.\n";
-	print "Please check your input and start the plugin again.\n";
-	exit;
-      } 
-    }
-    $match++;  
-    $lookup = $argument;    
-  }
-}
-if ($match =~ /^1$/) {
-  #Do nothing 
-} else {
-  print "match: $match \n";
-  print "Error in yaml file multiple instances matching input yaml, please check input!\n";
-  exit;
-}
-
-return $lookup;
-
-}
-
-#############
-#Subroutine for opening out / edited yaml file
-#############
-sub openyml_write {
-my $outyml = $in; 
-#Create second yml file with change values originating from cli
-open(OUT, '>', "$outyml") or die "Could not open file '$outyml' $!";
-print "Done creating output file...\n";
-}
-
-#############
-#Subroutine to check whether yaml file exists
-#############
-sub openyml_read { 
-$in = $_[0];
-chomp $in;
-my $inyml = $in;
-open(FILE, '<', "$inyml") or die "Could not open file '$inyml' $!";
-close FILE;
-print "Done reading\n";
-}
-
-#############
-#Subroutine for reading argument yaml file in hash
-##############
-sub load_arguments {
-$in = $_[0];
-$yml_hash_arguments = LoadFile($in);
-return($yml_hash_arguments, %yml_hash_arguments);
-}
-
-#############
-#Subroutine for reading input yaml file in hash
-##############
-sub load {
-$in = $_[0];
-$yml_hash = LoadFile($in);
-return($yml_hash, %yml_hash);
-}
-
-#############
-#Subroutine for reading edited input yml file in hash
-##############
-sub load_edit {
-$in = $_[0];
-$yml_hash_edit = LoadFile($in);
-}
-
-#############
-#Subroutine for editing content yaml hash
-##############
-sub writeyml {
-my $yml_hash = $in;
-$writer = sub {
-    my $line = shift;
-    print FILE "$line\n";
-};
-my $file = Data::YAML::Writer->new;
-$file->write( $yml_hash, $writer );
-}
-
-#############
-#Subroutine for editing content edited yaml hash
-##############
-sub writeyml_edit {
-my $yml_hash = $in;
-$writer = sub {
-    my $line = shift;
-    print OUT "$line\n";
-};
-my $file = Data::YAML::Writer->new;
-$file->write( $yml_hash_edit, $writer );
-close OUT;
-}
--- a/annovar_yaml/annovar_yaml.xml	Mon Jun 10 09:21:03 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-<tool id="annovar_yaml" name="Annovar YAML" version="1.0">
-    <requirements>
-        <requirement type="package" version="1.29">perl-yaml</requirement>
-        <requirement type="package" version="1.73">perl-yaml-tiny</requirement>
-        <requirement type="package" version="0.74">perl-yaml-xs</requirement>
-        <requirement type="package" version="2.173">perl-data-dumper</requirement>
-        <requirement type="package" version="2.50">perl-getopt-long</requirement>
-    </requirements>
-    <command detect_errors="exit_code"><![CDATA[
-        perl $__tool_directory__/annovar_yaml.pl --invcf $vcf --inyml $yaml --outvcf $output
-    ]]></command>
-
-    <inputs>
-
-        <param type="data" name="yaml" format="txt" />
-        <param type="data" name="vcf" format="vcf" />
-
-    </inputs>
-    <outputs>
-        <data name="output" format="vcf"/>
-    </outputs>
-    <help><![CDATA[
-        TODO: Fill in help for Annovar YAML tool. 
-    ]]></help>
-</tool>