# HG changeset patch # User niels # Date 1560173667 14400 # Node ID 237d0eef2ad3dd4fad0b98e94e2009657ac347f5 # Parent 0044b989f60f38518d8e51d5d110cb7ffcecb825 Uploaded paryml fix diff -r 0044b989f60f -r 237d0eef2ad3 0348637001560173633/annovar_yaml/YAML_annovar.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0348637001560173633/annovar_yaml/YAML_annovar.yml Mon Jun 10 09:34:27 2019 -0400 @@ -0,0 +1,72 @@ +GENERAL: + - APPLICATION: 'annovar' + PATHSCRIPTS: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' + CODING_ANNOVAR: 'coding_annovar.pl' + TABLE_ANNOVAR: 'table_annovar.pl' + LOCATION_DATABASE: ' /hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' + DOT2UNDERLINE: 'yes' + NASTRING: '.' + OTHERINFO: 'yes' + POLISH: 'yes' + REMOVE: 'yes' + THREAD: '8' + INPUTFORMAT: 'vcfinput' + SPECIES: 'human' + BUILD: 'hg19' +ANALYSIS: + DATABASES: + - NAME: 'cosmic84' + ALIAS: 'N/A' + PROTOCOL: 'cosmic' + VERSION: '84' + COMMENT: '20190221' + AVAILABLE: 'yes' + REQUIRED: 'yes' + OPERATION: 'f' + COLSWANTED: '4' + - NAME: 'refgene19' + ALIAS: 'N/A' + PROTOCOL: 'refgene' + VERSION: '19' + AVAILABLE: 'yes' + REQUIRED: 'yes' + COMMENT: '20190210' + OPERATION: 'g' + HGVS: 'yes' + SPLICING: '6' + EXONSPLIC: 'yes' + - NAME: 'ncbiRefSeq_UMCU' + ALIAS: 'Annotation' + PROTOCOL: 'ncbiRefSeq' + VERSION: '_UMCU' + AVAILABLE: 'yes' + REQUIRED: 'yes' + COMMENT: "100519" + OPERATION: 'g' + HGVS: 'yes' + SPLICING: '6' + EXONSPLIC: 'yes' + - NAME: 'avsnp150' + ALIAS: 'dbSNP' + PROTOCOL: 'avsnp' + VERSION: '150' + AVAILABLE: 'yes' + REQUIRED: 'yes' + OPERATION: 'f' + COLSWANTED: '1' + - NAME: 'clinvar_20180603' + ALIAS: 'N/A' + PROTOCOL: 'clinvar' + VERSION: '_20180603' + AVAILABLE: 'yes' + REQUIRED: 'yes' + OPERATION: 'f' + COLSWANTED: '5' + - NAME: 'class100519' + ALIAS: 'Class' + PROTOCOL: 'class' + VERSION: '100519' + AVAILABLE: 'yes' + REQUIRED: 'yes' + OPERATION: 'f' + COLSWANTED: '1' diff -r 0044b989f60f -r 237d0eef2ad3 0348637001560173633/annovar_yaml/YAML_arguments_annovar.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0348637001560173633/annovar_yaml/YAML_arguments_annovar.yml Mon Jun 10 09:34:27 2019 -0400 @@ -0,0 +1,117 @@ +GENERAL: + - APPLICATION: 'annovar' + PATHSCRIPTS: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' + CODING_ANNOVAR: 'coding_annovar.pl ' + TABLE_ANNOVAR: 'table_annovar.pl ' + LOCATION_DATABASE: ' /hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' + DOT2UNDERLINE: + yes: '--dot2underline ' + no: '' + NASTRING: '--nastring . ' + OTHERINFO: + yes: '--otherinfo ' + no: '' + POLISH: + yes: '--polish ' + no: '' + REMOVE: + yes: '--remove ' + no: '' + THREAD: '--thread 8 ' + INPUTFORMAT: + vcfinput: '--vcfinput ' + SPECIES: + human: 'humandb ' + mouse: 'mousedb ' + BUILD: '--buildver hg19 ' +ANALYSIS: + DATABASES: + - NAME: 'cosmic84' + ALIAS: 'N/A' + PROTOCOL: 'cosmic' + VERSION: '84' + COMMENT: '20190221' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + OPERATION: 'f' + COLSWANTED: '--colswanted 4 ' + - NAME: 'refgene19' + ALIAS: 'N/A' + PROTOCOL: 'refgene' + VERSION: '19' + COMMENT: '20190210' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + OPERATION: 'g' + HGVS: + yes: '--hgvs ' + no: '' + SPLICING: '--splicing 6 ' + EXONSPLIC: + yes: '--exonicsplicing ' + no: '' + - NAME: 'ncbiRefSeq_UMCU' + ALIAS: 'Annotation' + PROTOCOL: 'ncbiRefSeq' + VERSION: '_UMCU' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + COMMENT: "100519" + OPERATION: 'g' + HGVS: + yes: '--hgvs ' + no: '' + SPLICING: '--splicing 6 ' + EXONSPLIC: + yes: '--exonicsplicing ' + no: '' + - NAME: 'avsnp150' + ALIAS: 'dbSNP' + PROTOCOL: 'avsnp' + VERSION: '150' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + OPERATION: 'f' + COLSWANTED: '--colswanted 1 ' + - NAME: 'clinvar_20180603' + ALIAS: 'N/A' + PROTOCOL: 'clinvar' + VERSION: '_20180603' + COMMENT: 'blah' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + OPERATION: 'f' + COLSWANTED: '--colswanted 5 ' + - NAME: 'class100519' + ALIAS: 'Class' + PROTOCOL: 'class' + VERSION: '100519' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + OPERATION: 'f' + COLSWANTED: '--colswanted 1 ' + diff -r 0044b989f60f -r 237d0eef2ad3 0348637001560173633/annovar_yaml/annovar_yaml.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0348637001560173633/annovar_yaml/annovar_yaml.pl Mon Jun 10 09:34:27 2019 -0400 @@ -0,0 +1,1121 @@ +#/usr/bin/perl +#This code is using v4 of the Run_Annovar_vx.pl script! + +################## +#!!TEST COMMAND!!# +################## +#Following input required, input fixed currently to run limited Annovar settings +#perl annovar_yaml.pl --inyml YAML_annovar.yml --invcf invcf.vcf --outvcf outvcf.vcf > log.txt + +#Future prospects: ability to learn to read YAML files to pick up parameters. Manual definition of command line still required! + +#perl modules use +use YAML; +use YAML::Tiny; +use YAML::XS 'LoadFile'; +use Data::Dumper; +#use Data::YAML::Writer; +use Getopt::Long; +use feature qw(postderef postderef_qq); +use feature qw(refaliasing); +#no warnings qw(experimental::postderef); +use strict; + +#input required +#two modes run and edit +#Basis Yaml file [default required] +#Yaml out file [required in edit mode] +#species [default required] +#build [default required] +#protocol [default required] +#protocolversion [default required] + +#basic paramters +#Switch to enable disable Aliasses; enable=1 disable=0 +my $use_alias = "1"; +my $edit = "0"; +my $run = "0"; +my $application = "0"; +#arguments yaml file is fixed! +#my $paryml = "YAML_arguments_annovar.yml"; +my $paryml; +my $inyml; +my $outyml; +my $invcf; +my $outvcf; +my $yml_hash_arguments; +my %yml_hash_arguments; +my $yml_hash; +my %yml_hash; +my $yml_hash_edit; +my %yml_hash_edit; +my $in; +my $in2; +my %in; +my %in2; +my $writer; +my $lookup; +my $data_type; + +#parameter applications +my %annovarparameters_array; +my %annovarparameters_single; +my $annovarparameters_array; +my $annovarparameters_single; + +#Strings used as input parameters +#my $input_general_location_database; +#my $input_general_dir_scripts; +#my $input_general_location_scripts_coding_annovar; +#my $input_general_location_scripts_table_annovar; +my $input_annovar_build; +my $input_annovar_species; +my $inprotocol; +my $inprotocolversion; +my @inprotocol; +my @inprotocolversion; + +my $input_general_settings_dot2underline; +my $input_general_settings_nastring; +my $input_general_settings_otherinfo; +my $input_general_settings_polish; +my $input_general_settings_remove; +my $input_general_settings_thread; +my $input_general_settings_inputformat; + +my $input_databases_available; +my $input_databases_required; + +my $input_databases_colswanted; +my $input_databases_exonicsplic; +my $input_databases_hgvs; +my $input_databases_operation; +my $input_databases_splicing; +my @input_databases_colswanted; +my @input_databases_exonicsplic; +my @input_databases_hgvs; +my @input_databases_operation; +my @input_databases_splicing; + +#other variables +my $script; +my $type; +my $count; +my $column_build; +my $column_species; +my @column_build; +my @column_species; +my $pattern; +my $pattern2; +my $match; +my $match_value; +my @match_value; +my @readonly; +my %editinyml; +my $folder_symlinks = '0'; +my $path_symlinks; +my @softlink_building_blocks; + +#Fixed values for searching hashes +my $level5 = "DATABASES"; + +#parameters to build command +my $protocol; +my @protocol; +my $operation; +my $argument; + +GetOptions ( + #Required input + 'protocol=s' => \$inprotocol, + 'protocolversion=s' => \$inprotocolversion, + 'script=s' => \$script, + 'invcf=s' => \$invcf, + 'outvcf=s' => \$outvcf, + 'paryml=s' => \$paryml, + 'application=s' => \$application, + 'inyml=s' => \$inyml, + 'outyml=s' => \$outyml, + 'build=s' => \$input_annovar_build, + 'species=s' => \$input_annovar_species, + #Optional input + 'edit' => \$edit, + 'run' => \$run, + #variables in GENERAL + 'dot2underline=s' => \$input_general_settings_dot2underline, + 'nastring=s' => \$input_general_settings_nastring, + 'otherinfo=s' => \$input_general_settings_otherinfo, + 'polish=s' => \$input_general_settings_polish, + 'remove=s' => \$input_general_settings_remove, + 'thread=s' => \$input_general_settings_thread, + 'inputformat=s' => \$input_general_settings_inputformat, + #variables in DATABASES + 'colswanted=s' => \$input_databases_colswanted, + 'exonicsplic=s' => \$input_databases_exonicsplic, + 'hgvs=s' => \$input_databases_hgvs, + 'operation=s' => \$input_databases_operation, + 'splicing=s' => \$input_databases_splicing, +); + +chomp $invcf; +chomp $outvcf; + +#inactive options +# 'drequired=s' => \$input_databases_required, +# 'davail=s' => \$input_databases_available, + +# 'gpathdatabase=s' => \$input_general_location_database, +# 'gpathscripts=s' => \$input_general_dir_scripts, +# 'gpathcodannovar=s' => \$input_general_location_scripts_coding_annovar, +# 'gpathtabannovar=s' => \$input_general_location_scripts_table_annovar, + +#Check input parameters +#Do some checks whether all required arguments are given + +#some code + +#arrays with all application specific parameters +#make sure keys are unique to allow for parsing of yml! + +%annovarparameters_array = ( + "COLSWANTED" => "$input_databases_colswanted", + "EXONICSPLIC" => "$input_databases_exonicsplic", + "HGVS" => "$input_databases_hgvs", + "OPERATION" => "$input_databases_operation", + "SPLICING" => "$input_databases_splicing", +); + +%annovarparameters_single = ( + "DOT2UNDERLINE" => "$input_general_settings_dot2underline", + "NASTRING" => "$input_general_settings_nastring", + "OTHERINFO" => "$input_general_settings_otherinfo", + "POLISH" => "$input_general_settings_polish", + "REMOVE" => "$input_general_settings_remove", + "THREAD" => "$input_general_settings_thread", + "INPUTFORMAT" => "$input_general_settings_inputformat", +); + +#parameter not allowed to be edited in yml +@readonly = ("hg19","human","mouse","mm10","annovar","VERSION","TABLE_ANNOVAR","refgene","cosmic"); + + +#converting input strings into input arrays +@inprotocol = split (",", $inprotocol); +@inprotocolversion = split (",", $inprotocolversion); +@input_databases_colswanted = split (",", $input_databases_colswanted); +@input_databases_exonicsplic = split (",", $input_databases_exonicsplic); +@input_databases_hgvs = split (",", $input_databases_hgvs ); +@input_databases_operation = split (",", $input_databases_operation); +@input_databases_splicing = split (",", $input_databases_splicing); + +#$in = $inyml; +#openyml_read($in); +#$in = $inyml; +#load($in); + +#check if combination of application,species, build and protocol exists +#$count = "0"; +#%in = $yml_hash; +#foreach (@inprotocol) { +#print "protocol $inprotocol[$count]\n"; +#$pattern = "$application:ANALYSIS:$input_annovar_species:$input_annovar_build:DATABASES:$inprotocol[$count]$inprotocolversion[$count]:NAME $inprotocol[$count]$inprotocolversion[$count]"; +#@pattern = ("GENERAL:species", "GENERAL:build", "GENERAL:protocol"); +#foreach @pattern { +# parse($in, $_); +# if ($match =~ /^1$/) { print "Present!\n"; }; +# $count++; +# } +#} + +#$in = $inyml; +#openyml_read($in); +#$in = $inyml; +#load_edit($in); + +#Editing section first single values, next arrays +#print "Starting editing of strings...\n"; +#foreach my $key (keys %annovarparameters_single) { +# my $innermatch = "0"; + #print "Key: $key\n"; +# foreach my $innerkey (@readonly) { +# if ($key =~ /^$innerkey$/) { + #print "Value not allowed to be changed!\n"; +# $innermatch++; }; +# } +# if ($innermatch >= 1) { +# print "Skipping: $key\n"; + #Do nothing +# } else { +# my $value = $annovarparameters_single{$key}; +# if ($value !~ /^$/) { +# print "key:$key value:$value\n"; +# $pattern = "^($application:.{1,}:$key)"; +# print "pattern: $pattern\n"; +# %in = $yml_hash_edit; +# parse($in, $pattern); +# print "editing value...: $match_value\n"; +# @match_value = split (':', $match_value); +# my $max = scalar @match_value; +# $count = "0"; +# foreach (@match_value) { +# if ($count == $max-1) { +# print "$_\n"; +# $_ =~ s/^.{1,}$/$value/g; +# print "after: $_ \n"; +# } +# $count++; +# } +# print "max $max\n"; +# if ( $max == 4 ) { +# $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]} = $match_value[3] +# } +# } +# } +#} + +#print "\n"; +#my $checkkey; + +#allow processing of arrays as well +#print "Starting editing of array values...\n"; +#foreach my $key ( keys %annovarparameters_array) { +# $count = "0"; + #print "COUNT: $count\n"; + #print "KEY: $key\n"; +# my @key = split (',', $key); +# foreach my $innerkey (@key) { +# my $innermatch = "0"; +# foreach $checkkey (@readonly) { +# if ($innerkey =~ /^$checkkey$/) { + #Do nothing +# $innermatch++; }; +# } +# if ($innermatch >= 1) { +# print "innerkey: $innerkey skipped\n"; + +# } else { +# my $value = $annovarparameters_array{$key}; +# if ($value !~ /^$/ ) { +# print "Value: $value\n"; +# my @value = split (',', $value); +# my $innercount = "0"; +# foreach my $innervalue (@value) { +# if ( $innervalue !~ /^$/ ) { +# print "\nkey: $key innerkey: $innerkey check: $checkkey value: $innervalue\n"; +# print "innercount: $innercount\n"; +# $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]$inprotocolversion[$innercount]:$key)"; +# print "pattern: $pattern\n"; +# %in = $yml_hash_edit; +# parse($in, $pattern); +# undef %in; +# print "editing value...: $match_value\n"; +# if ($match =~ /^1$/) { +# @match_value = split (':', $match_value); +# my $max = scalar @match_value; +# #$count = "0"; +# foreach (@match_value) { +# print "count: $count\n"; +# if ($count == $max-1) { +# print "empty_check: $_ \n"; +# $_ =~ s/^.{1,}$/$value[$innercount]/g; +# print "empty_check_after: $_ \n"; +# } +# print "Max: $max\n"; +# $count++; +# } +# print "change: $value[$innercount]\n"; +# print "Max: $max\n"; +# if ( $max == 9 ) { +# $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$match_value[7]} = $match_value[8]; +# } +# } else { +# +# if ($match =~ /^0$/) { +# $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]:$inprotocolversion[$innercount])"; +# %in = $yml_hash_edit; +# parse($in, $pattern); +# undef %in; +# print "editing value...: $match_value\n"; +# if ($match > 1) { +# @match_value = split (':', $match_value); +# my $max = scalar @match_value; +# $count = "0"; +# print "Max: $max\n"; +# if ( $max == 9 ) { +# $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$innerkey} = "$innervalue"; +# } +# } +# } +# } +# } else { +# #Do nothing no arugment supplied +# } +# $innercount++; +# } +# } else { +# #Do nothing no argument supplied +# } +# } +# } +#$count++; +#print "\n"; +#} + +#print "\nFinished editing...\n"; + +#Create second yml file with change values originating from cli +#$in = $outyml; +#openyml_write($in); +#%in = $yml_hash_edit; +#writeyml_edit($in); +#$in = $outyml; +#load($outyml); +#print "Finished loading second yml...\n"; + +#Load ymlhash with arguments +#$in = $paryml; +#openyml_read($in); +#$in = $paryml; +#load_arguments($in); +#print "Finished loading arguments...\n"; + + +#Fill hashes with input yaml files as defined in input and fixed arguments yaml +openyml_read ($paryml); +openyml_read ($inyml); +load ($inyml, %yml_hash, $yml_hash); +load_arguments ($paryml, %yml_hash_arguments, $yml_hash_arguments); + +##Check input yaml files format +##Pickup arguments from YAML to run command + +######################### +#!!General parser test!!# +######################### + +#Parsing both input YML files to check format, deviations results in killing the process +print "\nParsing Input YAML File\n"; +parse ($yml_hash, $data_type); +print "\nParsing Argument YAML File\n"; +parse ($yml_hash_arguments, $data_type); + +#Manually defined values to fill Annovar command +#Possible to extract from YAML using parser +my $application = 'annovar'; + +#Possible to extract from YAML using parser +my $ncbiRefSeq = 'ncbiRefSeq'; +my $cosmic = 'cosmic'; +my $dbsnp = 'avsnp'; +my $clinvar = 'clinvar'; +my $class = 'class'; + +#Possible to extract from YAML using parser +my $ncbiRefSeq_version = '_UMCU'; +my $cosmic_version = '84'; +my $dbsnp_version = '150'; +my $clinvar_version = '_20180603'; +my $class_version = '100519'; + +#possible to extract from YAML using parser +#species input fixed +my $input_annovar_species = "human"; + +#manual definition required! --> parameter in plugin galaxy / script hardcoded?? +my $language = 'perl '; + +#Possible to extract from YAML using parser +my $parse1 = 'parse1'; +my $parse2 = 'parse2'; + +#Possible to extract from YAML +my $application_path = "$application,APPLICATION,GENERAL"; + +my $ncbiRefSeq_path = "$ncbiRefSeq$ncbiRefSeq_version,NAME,ANALYSIS,DATABASES"; +my $cosmic_path = "$cosmic$cosmic_version,NAME,ANALYSIS,DATABASES"; +my $dbsnp_path = "$dbsnp$dbsnp_version,NAME,ANALYSIS,DATABASES"; +my $clinvar_path = "$clinvar$clinvar_version,NAME,ANALYSIS,DATABASES"; +my $class_path = "$class$class_version,NAME,ANALYSIS,DATABASES"; + +##Code to create folder for database softlinks + +my $path_folder_symlinks; +my @symlinkfolder_building_blocks; +my $symlinks_folder_created; + +if ( $use_alias =~ /^1$/) { + +@symlinkfolder_building_blocks = ("$parse1,LOCATION_DATABASE,$application_path","$parse1,SPECIES,$application_path"); +} + +#Setup variables for processing parse_look function +my $lookup; +my %input; +my $input; +my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5); + +$symlinks_folder_created = '0'; + + +#Code reading species en database location from YAML +if ( $use_alias =~ /^1$/) { + + foreach (@symlinkfolder_building_blocks) { + $in = $inyml; + $in2 = $paryml; + #print "loop1:$_\n"; + my @values = split /\,/, $_; + foreach (@values) { + if ( $_ !~ /^parse[12]$/ ) { + print "Error while generating soflink folder [$_]...\n"; + $path_folder_symlinks .= "$_"; + last; + } + $lookup = ""; + parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); + chomp $lookup; + $path_folder_symlinks .= "$lookup"; + $lookup = ''; + %input = ''; + $input = ''; + last; + } + } + + #Generate base name folder, numbered 0 up to endless to allow specific folder for parrallel analysis + + $symlinks_folder_created = '0'; + + $path_folder_symlinks =~ s/^\s+|\s+$//g; + $folder_symlinks =~ s/^\s+|\s+$//g; + + print "\n"; + print "Folder symlinks: $path_folder_symlinks/$folder_symlinks\n"; + + while ($symlinks_folder_created =~ /^0$/ ) { + if ( -d "$path_folder_symlinks/$folder_symlinks" and -e "$path_folder_symlinks/$folder_symlinks" ) { + print "Folder $path_folder_symlinks/$folder_symlinks is in use!\n"; + $folder_symlinks++; + } else { + print "Creating $path_folder_symlinks/$folder_symlinks...\n"; + system ( "mkdir $path_folder_symlinks/$folder_symlinks" ); + $path_symlinks = "$path_folder_symlinks/$folder_symlinks/"; + print "Created $path_symlinks\n"; + $symlinks_folder_created++; + } + } +} + +##Building annovar command: + +#Manual definition required +#blocks to build command +my @command_building_blocks = ( +"$language", +"$parse1,PATHSCRIPTS,$application_path", +"$parse1,TABLE_ANNOVAR,$application_path", +"$invcf", +"$parse1,LOCATION_DATABASE,$application_path", +"$parse1,SPECIES,$application_path", +"$parse1,BUILD,$application_path", +"$parse1,REMOVE,$application_path", +#Manually define arguments using ALIASES or regular NAME argument based on turning ALIAS on or OFF +#Define this in sections scanning the YAML file +"--protocol \'", +"$parse2,ALIAS,$ncbiRefSeq_path", +"\,", +"$parse2,NAME,$cosmic_path", +"\,", +"$parse2,ALIAS,$dbsnp_path", +"\,", +"$parse2,NAME,$clinvar_path", +"\,", +"$parse2,ALIAS,$class_path", +"\' ", +"--operation \'", +"$parse2,OPERATION,$ncbiRefSeq_path", +"\'", +"\,", +"\'", +"$parse2,OPERATION,$cosmic_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,OPERATION,$dbsnp_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,OPERATION,$clinvar_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,OPERATION,$class_path", +"\' ", +"$parse1,DOT2UNDERLINE,$application_path", +"$parse1,OTHERINFO,$application_path", +"$parse1,NASTRING,$application_path", +"$parse1,INPUTFORMAT,$application_path", +"--arg \'", +"$parse2,HGVS,$ncbiRefSeq_path", +"$parse2,SPLICING,$ncbiRefSeq_path", +"$parse2,EXONSPLIC,$ncbiRefSeq_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,COLSWANTED,$cosmic_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,COLSWANTED,$dbsnp_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,COLSWANTED,$clinvar_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,COLSWANTED,$class_path", +"\' ", +"$parse1,THREAD,$application_path", +"$parse1,POLISH,$application_path", +"-outfile $outvcf "); + +print "\n"; + + +#Create Empty command +my $test_command = ""; + +#Start building command to run +foreach (@command_building_blocks) { + $in = $inyml; + $in2 = $paryml; + #print "loop1:$_\n"; + if ( $_ =~ /^,$/ ) { + print "[NOT YAML] $_ is separator and does not require matching!\n"; + $test_command .= "$_"; + next; + } + my @values = split /\,/, $_; + foreach (@values) { + if ( $_ !~ /^parse[12]$/ ) { + print "[NOT YAML] $_ does not require matching!\n"; + $test_command .= "$_"; + last; + } + $lookup = ""; + parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); + + #If alias is enable determine which number is used for this analysis to pickup the correct database files + if ( $use_alias =~ /^1$/ ) { + #add code to add folder number for analysis + if ($values[1]=~/^SPECIES/ ) { + $lookup =~ s/^\s+|\s+$//g; + $folder_symlinks; + $lookup .= "/$folder_symlinks "; + } + } + print "[YAML]: $lookup\n"; + $test_command .= "$lookup"; + $lookup = ''; + $input = ''; + %input = ''; + last; + } +} + +print "\nalias:$use_alias\n"; + +#Created softlinks voor database files + +my @database_info; + +if ( $use_alias =~ /^1$/ ) { + print "Entering Alias stuff..\n"; + #Define databases in use + @softlink_building_blocks = ( + "$parse2,NAME,$ncbiRefSeq_path", + "$parse2,NAME,$cosmic_path", + "$parse2,NAME,$dbsnp_path", + "$parse2,NAME,$clinvar_path", + "$parse2,NAME,$class_path" ); + + print "\nCreating softlinks to Annovar database files:\n"; + + #Use parse_lookup to find argument and create symlinks in analysis database folder + foreach (@softlink_building_blocks) { + $in = $inyml; + $in2 = $paryml; + my @values = split /\,/, $_; + foreach (@values) { + if ( $_ !~ /^parse[12]$/ ) { + print "[NOT YAML] $_ does not require matching!\n"; + last; + } + $lookup = ""; + parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); + print "database=$lookup\n"; + #add code to add folder number for analysis + #print "Path_destination=$path_symlinks\n"; + #print "Path_origin=$path_folder_symlinks\n"; + my $database_detail = $lookup; + + $lookup = ""; + parse_lookup ("parse1", "BUILD", "$application","APPLICATION","GENERAL", %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); + print "Species: $lookup\n"; + my @build = split /\s/, $lookup; + #print "Species=$build[1]\n"; + my $species = $build[1]; + + $lookup = ''; + print "$values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5]\n"; + parse_lookup ($values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); + #print "Alias=$lookup\n"; + my $alias = $lookup; + + #Based on database and presence of either index file or not run softlink commands + if ( $alias =~ /^N\/A$/ ) { + my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$database_detail.txt"; + print "command: $command\n"; + system ( $command ); + } elsif ( $alias !~ /^N\/A$/ ) { + my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$alias.txt"; + print "command2: $command\n"; + system ( $command ); + } + if ( $alias =~ /^dbSNP$/ ) { + my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt.idx $path_symlinks$species\_$alias.txt.idx"; + print "command3: $command\n"; + system ( $command ); + } + if ( $alias =~ /^Annotation$/ ) { + my $command = "ln -s $path_folder_symlinks/$species\_$database_detail\Mrna.fa $path_symlinks$species\_$alias\Mrna.fa"; + print "command4: $command\n"; + system ( $command ); + } + if ( $database_detail =~ /^clinvar/ ) { + my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt.idx $path_symlinks$species\_$database_detail.txt.idx"; + print "command3: $command\n"; + system ( $command ); + } + + #Push all softlinks used in array + if ( $alias !~ /^N\/A$/ ) { + push @database_info, "$alias=$species\_$database_detail"; + } + + + $lookup = ''; + $input = ''; + %input = ''; + last; + } + } + print "Done preparing databases!\n"; +} + +#Run Generated annovar command + +print "\nResulting in following command: $test_command\n"; +################################################# +#!!IMPORTANT# DECOMMENT FOR ACTIVE USE!!# +system ($test_command); +################################################# + +my $addition_vcf; + +if ( $use_alias =~ /^1$/ ) { + + #Add translation for common databases to specific version + $addition_vcf; + my $database_details = $path_symlinks.'database_details.txt'; + $addition_vcf .= "##"; + foreach (@database_info) { + $addition_vcf .= ":$_"; + } + + #Add information to vcf output file + open(OUTVCF_EDIT, '>', "$outvcf\_edit") or die "Could not open file '$outvcf' $!"; + print "\nDone open edit output vcf file...\n"; + open(OUTVCF, '<', "$outvcf") or die "Could not open file '$outvcf' $!"; + print "Done reading vcf output vcf file...\n\n"; + + + while ( my $line = ) { + my @row = split ( "\t", $line ); + if ( $row[0] =~ /^#CHROM$/ ) { + print OUTVCF_EDIT "$addition_vcf\n"; + print OUTVCF_EDIT "$line"; + } else { + print OUTVCF_EDIT "$line"; + } + } + + close OUTVCF_EDIT; + close OUTVCF; + + #Replace orginal vcf file with edited vcf file with additional information + my $mv_command = "mv $outvcf\_edit $outvcf"; + system ($mv_command); + + print "\nAdding line to vcf file:\n"; + print "$addition_vcf\n\n"; + + #Remove folder containing softlinks + print "Removing temporary directory:\n"; + print "rm -r $path_symlinks\n\n"; + system ( "rm -r $path_symlinks" ); +} + +print "Job done program stopping.\n"; + +exit; + +##################### +#!!!END OF SCRIPT!!!# +#################### + +############# +#SUBROUTINES# +############# + +############# +#Subroutine to obtain datatype variable +############# +sub test_type { + +my $input = $_[0]; +$data_type = ref($input); +return $data_type; + +} + + +############# +#Subroutine to parse and check yaml for correct format +############# +sub parse { + +#@inyml = @{$yml_hash->{$pattern4}{$pattern5}}; +#@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}}; + +my $input_hash = $_[0]; + +my ($key_a, $key_a2, $key_b, $key_b2, $key_c, $key_c2); +my ($value_a, $value_a2, $value_b, $value_b2, $value_c, $value_c2); +my (@hash, @hash2, $hash, $hash2); + +my $count_a = '0'; +my $count_a2 = '0'; +my $count_a3 = '0'; +my $count_a4 = '0'; +my $count_b = '0'; +my $count_b2 = '0'; +my $count_b3 = '0'; +my $count_b4 = '0'; +my $count_c = '0'; +my $count_c2 = '0'; +my $count_c3 = '0'; +my $count_c4 = '0'; + +for $key_a (keys %$input_hash) { + $count_a++; + for $value_a (values %$input_hash) { + $count_a2++; + if ( $count_a =~ /^$count_a2$/ ) { + test_type($value_a); + if ($data_type =~ /^HASH$/) { + for $key_a2 ( keys %{$input_hash->{$key_a}} ) { + $count_a3++; + for $value_a2 ( values %{$input_hash->{$key_a}} ) { + $count_a4++; + if ($count_a3 =~ /^$count_a4$/) { + test_type($value_a2); + if ($data_type =~ /^HASH$/) { + print "[ERROR1] Found unexpected yml file format [ key: $key_a2 ] , please check whether your YAML file meets requirements!\n"; + exit; + } elsif ($data_type =~ /^ARRAY$/) { + @hash = @{$input_hash->{$key_a}{$key_a2}}; + foreach $hash ( @hash ) { + for $key_b ( keys %$hash ) { + $count_b++; + for $value_b (values %$hash ) { + $count_b2++; + if ($count_b =~ /^$count_b2$/) { + test_type($value_b); + if ( $data_type =~ /^HASH$/ ) { + for $key_b2 ( keys %{$hash->{$key_b}} ) { + $count_b3++; + for $value_b2 (values %{$hash->{$key_b}} ) { + $count_b4++; + if ($count_b3 =~ /^$count_b4/) { + test_type($value_b2); + if ( $data_type =~ /^HASH$/ ) { + print "[ERROR2] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n"; + exit; + } elsif ( $data_type =~ /^ARRAY$/ ) { + print "[ERROR3] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n"; + exit; + } else { + #Assume value level 4 + print "value_b2: $key_a:$key_a2:$key_b:$key_b2:$value_b2\n"; + } + } + } + $count_b4 = "0"; + } + $count_b3 = "0"; + } + elsif ( $data_type =~ /^ARRAY$/ ) { + print "[ERROR4] Found unexpected yml file format [ key: $key_b ] , please check whether your YAML file meets requirements!\n"; + exit; + } else { + #assume value level 3 + print "value_b: $key_a:$key_a2:$key_b:$value_b\n"; + } + } + } + $count_b2 = '0'; + } + $count_b = '0'; + } + } else { + #Assume value level 2 + print "Value_a2: $key_a:$value_a2\n"; + } + } + } + $count_a4 = '0'; + } + $count_a3 = '0'; + } + elsif ($data_type =~ /^ARRAY/) { + @hash2 = @{$input_hash->{$key_a}}; + foreach $hash2 ( @hash2 ) { + for my $key_c ( keys %$hash2 ) { + $count_c++; + for $value_c ( values %$hash2 ) { + $count_c2++; + if ($count_c =~ /^$count_c2$/) { + test_type($value_c); + if ( $data_type =~ /^HASH$/ ) { + for $key_c2 ( keys %{$hash2->{$key_c}} ) { + $count_c3++; + for $value_c2 ( values %{$hash2->{$key_c}} ) { + $count_c4++; + if ( $count_c3 =~ /^$count_c4$/ ) { + test_type($value_c2); + if ( $data_type =~ /^HASH$/ ) { + print "[ERROR5] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n"; + exit; + } elsif ( $data_type =~ /^ARRAY$/ ) { + print "[ERROR6] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n"; + exit; + } else { + #Assume level 4 + print "value_c2: $key_a:$key_a2:$key_c:$key_c2:$value_c2\n"; + } + } + } + $count_c4 = '0'; + } + $count_c3 = '0'; + } + elsif ( $data_type =~ /^ARRAY$/ ) { + print "[ERROR7] Found unexpected yml file format [ key: $key_c ] , please check whether your YAML file meets requirements!\n"; + exit; + } else { + #assume value level 3 + print "value_c: $key_a:$key_c:$value_c\n"; + } + } + } + $count_c2 = '0'; + } + $count_c = '0'; + } + } else { + #Assume value level 1 + print "value_a: $key_a:$value_a\n"; + } + } + } +$count_a2 = '0'; +} +$count_a = '0'; + +$data_type = ""; + +} + +######################################### +#Subroutine to fetch arguments from YAML# +######################################### + +sub parse_lookup { + +my $hash; +my %hash; +my $hash_a; +my %hash_a; +my %hash_a2; +my $hash_a2; +my @inyml; +my @inarguments; +my $count; +my $match; +my $match2; +my $argument; +my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5); + +if ( $_[0] =~ /^parse1$/ ) { +$pattern1 = $_[1]; +$pattern2 = $_[2]; +$pattern3 = $_[3]; +$pattern4 = $_[4]; +@inyml = @{$yml_hash->{$pattern4}}; +@inarguments = @{$yml_hash_arguments->{$pattern4}}; +} elsif ( $_[0] =~ /^parse2$/) { +$pattern1 = $_[1]; +$pattern2 = $_[2]; +$pattern3 = $_[3]; +$pattern4 = $_[4]; +$pattern5 = $_[5]; +@inyml = @{$yml_hash->{$pattern4}{$pattern5}}; +@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}}; +} + +$match = "0"; + +foreach my $hash ( @inyml ) { + $hash = \%$hash; + if ($hash->{$pattern3} =~ /^$pattern2$/ ) { + my $value = $hash->{$pattern1}; + if ($value eq "" ) { + print "Error unknown argument \"$pattern1\" for annotation database \"$pattern2\" in input YAML!\n"; + exit; + } + foreach my $hash_a (@inarguments) { + my $hash_a2 = $hash_a->{$pattern1}; + if ( $hash_a->{$pattern3} =~ /^$pattern2$/ ) { + $match2++; + if ($hash_a2 =~ /^HASH/ ) { + $argument = $hash_a2->{$value}; + } else { + $argument = $hash_a2; + } + if ($argument eq "" ) { + print "Unknown given option \"$value\" for argument \"$pattern1\" in YAML arguments file or input YAML for database \"$pattern2\"!\n"; + print "Please check your input and start the plugin again.\n"; + exit; + } + } + if ($match2 =~ /^1$/) { + #Do nothing + } elsif ( $match2 > 1 ) { + print "Error Multiple matches for database \"$pattern2\" database in YAML argumentsfile.\n"; + print "Please check your input and start the plugin again.\n"; + exit; + } elsif ( $match2 =~ /^0$/ ) { + print "Error no matches for database \"$pattern2\" in YAML argumentsfile.\n"; + print "Please check your input and start the plugin again.\n"; + exit; + } + } + $match++; + $lookup = $argument; + } +} +if ($match =~ /^1$/) { + #Do nothing +} else { + print "match: $match \n"; + print "Error in yaml file multiple instances matching input yaml, please check input!\n"; + exit; +} + +return $lookup; + +} + +############# +#Subroutine for opening out / edited yaml file +############# +sub openyml_write { +my $outyml = $in; +#Create second yml file with change values originating from cli +open(OUT, '>', "$outyml") or die "Could not open file '$outyml' $!"; +print "Done creating output file...\n"; +} + +############# +#Subroutine to check whether yaml file exists +############# +sub openyml_read { +$in = $_[0]; +chomp $in; +my $inyml = $in; +open(FILE, '<', "$inyml") or die "Could not open file '$inyml' $!"; +close FILE; +print "Done reading\n"; +} + +############# +#Subroutine for reading argument yaml file in hash +############## +sub load_arguments { +$in = $_[0]; +$yml_hash_arguments = LoadFile($in); +return($yml_hash_arguments, %yml_hash_arguments); +} + +############# +#Subroutine for reading input yaml file in hash +############## +sub load { +$in = $_[0]; +$yml_hash = LoadFile($in); +return($yml_hash, %yml_hash); +} + +############# +#Subroutine for reading edited input yml file in hash +############## +sub load_edit { +$in = $_[0]; +$yml_hash_edit = LoadFile($in); +} + +############# +#Subroutine for editing content yaml hash +############## +sub writeyml { +my $yml_hash = $in; +$writer = sub { + my $line = shift; + print FILE "$line\n"; +}; +my $file = Data::YAML::Writer->new; +$file->write( $yml_hash, $writer ); +} + +############# +#Subroutine for editing content edited yaml hash +############## +sub writeyml_edit { +my $yml_hash = $in; +$writer = sub { + my $line = shift; + print OUT "$line\n"; +}; +my $file = Data::YAML::Writer->new; +$file->write( $yml_hash_edit, $writer ); +close OUT; +} diff -r 0044b989f60f -r 237d0eef2ad3 0348637001560173633/annovar_yaml/annovar_yaml.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0348637001560173633/annovar_yaml/annovar_yaml.xml Mon Jun 10 09:34:27 2019 -0400 @@ -0,0 +1,25 @@ + + + perl-yaml + perl-yaml-tiny + perl-yaml-xs + perl-data-dumper + perl-getopt-long + + + + + + + + + + + + + + diff -r 0044b989f60f -r 237d0eef2ad3 annovar_yaml/YAML_annovar.yml --- a/annovar_yaml/YAML_annovar.yml Mon Jun 10 09:21:03 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -GENERAL: - - APPLICATION: 'annovar' - PATHSCRIPTS: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' - CODING_ANNOVAR: 'coding_annovar.pl' - TABLE_ANNOVAR: 'table_annovar.pl' - LOCATION_DATABASE: ' /hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' - DOT2UNDERLINE: 'yes' - NASTRING: '.' - OTHERINFO: 'yes' - POLISH: 'yes' - REMOVE: 'yes' - THREAD: '8' - INPUTFORMAT: 'vcfinput' - SPECIES: 'human' - BUILD: 'hg19' -ANALYSIS: - DATABASES: - - NAME: 'cosmic84' - ALIAS: 'N/A' - PROTOCOL: 'cosmic' - VERSION: '84' - COMMENT: '20190221' - AVAILABLE: 'yes' - REQUIRED: 'yes' - OPERATION: 'f' - COLSWANTED: '4' - - NAME: 'refgene19' - ALIAS: 'N/A' - PROTOCOL: 'refgene' - VERSION: '19' - AVAILABLE: 'yes' - REQUIRED: 'yes' - COMMENT: '20190210' - OPERATION: 'g' - HGVS: 'yes' - SPLICING: '6' - EXONSPLIC: 'yes' - - NAME: 'ncbiRefSeq_UMCU' - ALIAS: 'Annotation' - PROTOCOL: 'ncbiRefSeq' - VERSION: '_UMCU' - AVAILABLE: 'yes' - REQUIRED: 'yes' - COMMENT: "100519" - OPERATION: 'g' - HGVS: 'yes' - SPLICING: '6' - EXONSPLIC: 'yes' - - NAME: 'avsnp150' - ALIAS: 'dbSNP' - PROTOCOL: 'avsnp' - VERSION: '150' - AVAILABLE: 'yes' - REQUIRED: 'yes' - OPERATION: 'f' - COLSWANTED: '1' - - NAME: 'clinvar_20180603' - ALIAS: 'N/A' - PROTOCOL: 'clinvar' - VERSION: '_20180603' - AVAILABLE: 'yes' - REQUIRED: 'yes' - OPERATION: 'f' - COLSWANTED: '5' - - NAME: 'class100519' - ALIAS: 'Class' - PROTOCOL: 'class' - VERSION: '100519' - AVAILABLE: 'yes' - REQUIRED: 'yes' - OPERATION: 'f' - COLSWANTED: '1' diff -r 0044b989f60f -r 237d0eef2ad3 annovar_yaml/YAML_arguments_annovar.yml --- a/annovar_yaml/YAML_arguments_annovar.yml Mon Jun 10 09:21:03 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,117 +0,0 @@ -GENERAL: - - APPLICATION: 'annovar' - PATHSCRIPTS: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' - CODING_ANNOVAR: 'coding_annovar.pl ' - TABLE_ANNOVAR: 'table_annovar.pl ' - LOCATION_DATABASE: ' /hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' - DOT2UNDERLINE: - yes: '--dot2underline ' - no: '' - NASTRING: '--nastring . ' - OTHERINFO: - yes: '--otherinfo ' - no: '' - POLISH: - yes: '--polish ' - no: '' - REMOVE: - yes: '--remove ' - no: '' - THREAD: '--thread 8 ' - INPUTFORMAT: - vcfinput: '--vcfinput ' - SPECIES: - human: 'humandb ' - mouse: 'mousedb ' - BUILD: '--buildver hg19 ' -ANALYSIS: - DATABASES: - - NAME: 'cosmic84' - ALIAS: 'N/A' - PROTOCOL: 'cosmic' - VERSION: '84' - COMMENT: '20190221' - AVAILABLE: - yes: '1' - no: '0' - REQUIRED: - yes: '1' - no: '0' - OPERATION: 'f' - COLSWANTED: '--colswanted 4 ' - - NAME: 'refgene19' - ALIAS: 'N/A' - PROTOCOL: 'refgene' - VERSION: '19' - COMMENT: '20190210' - AVAILABLE: - yes: '1' - no: '0' - REQUIRED: - yes: '1' - no: '0' - OPERATION: 'g' - HGVS: - yes: '--hgvs ' - no: '' - SPLICING: '--splicing 6 ' - EXONSPLIC: - yes: '--exonicsplicing ' - no: '' - - NAME: 'ncbiRefSeq_UMCU' - ALIAS: 'Annotation' - PROTOCOL: 'ncbiRefSeq' - VERSION: '_UMCU' - AVAILABLE: - yes: '1' - no: '0' - REQUIRED: - yes: '1' - no: '0' - COMMENT: "100519" - OPERATION: 'g' - HGVS: - yes: '--hgvs ' - no: '' - SPLICING: '--splicing 6 ' - EXONSPLIC: - yes: '--exonicsplicing ' - no: '' - - NAME: 'avsnp150' - ALIAS: 'dbSNP' - PROTOCOL: 'avsnp' - VERSION: '150' - AVAILABLE: - yes: '1' - no: '0' - REQUIRED: - yes: '1' - no: '0' - OPERATION: 'f' - COLSWANTED: '--colswanted 1 ' - - NAME: 'clinvar_20180603' - ALIAS: 'N/A' - PROTOCOL: 'clinvar' - VERSION: '_20180603' - COMMENT: 'blah' - AVAILABLE: - yes: '1' - no: '0' - REQUIRED: - yes: '1' - no: '0' - OPERATION: 'f' - COLSWANTED: '--colswanted 5 ' - - NAME: 'class100519' - ALIAS: 'Class' - PROTOCOL: 'class' - VERSION: '100519' - AVAILABLE: - yes: '1' - no: '0' - REQUIRED: - yes: '1' - no: '0' - OPERATION: 'f' - COLSWANTED: '--colswanted 1 ' - diff -r 0044b989f60f -r 237d0eef2ad3 annovar_yaml/annovar_yaml.pl --- a/annovar_yaml/annovar_yaml.pl Mon Jun 10 09:21:03 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1121 +0,0 @@ -#/usr/bin/perl -#This code is using v4 of the Run_Annovar_vx.pl script! - -################## -#!!TEST COMMAND!!# -################## -#Following input required, input fixed currently to run limited Annovar settings -#perl annovar_yaml.pl --inyml YAML_annovar.yml --invcf invcf.vcf --outvcf outvcf.vcf > log.txt - -#Future prospects: ability to learn to read YAML files to pick up parameters. Manual definition of command line still required! - -#perl modules use -use YAML; -use YAML::Tiny; -use YAML::XS 'LoadFile'; -use Data::Dumper; -#use Data::YAML::Writer; -use Getopt::Long; -use feature qw(postderef postderef_qq); -use feature qw(refaliasing); -#no warnings qw(experimental::postderef); -use strict; - -#input required -#two modes run and edit -#Basis Yaml file [default required] -#Yaml out file [required in edit mode] -#species [default required] -#build [default required] -#protocol [default required] -#protocolversion [default required] - -#basic paramters -#Switch to enable disable Aliasses; enable=1 disable=0 -my $use_alias = "1"; -my $edit = "0"; -my $run = "0"; -my $application = "0"; -#arguments yaml file is fixed! -#my $paryml = "YAML_arguments_annovar.yml"; -my $paryml; -my $inyml; -my $outyml; -my $invcf; -my $outvcf; -my $yml_hash_arguments; -my %yml_hash_arguments; -my $yml_hash; -my %yml_hash; -my $yml_hash_edit; -my %yml_hash_edit; -my $in; -my $in2; -my %in; -my %in2; -my $writer; -my $lookup; -my $data_type; - -#parameter applications -my %annovarparameters_array; -my %annovarparameters_single; -my $annovarparameters_array; -my $annovarparameters_single; - -#Strings used as input parameters -#my $input_general_location_database; -#my $input_general_dir_scripts; -#my $input_general_location_scripts_coding_annovar; -#my $input_general_location_scripts_table_annovar; -my $input_annovar_build; -my $input_annovar_species; -my $inprotocol; -my $inprotocolversion; -my @inprotocol; -my @inprotocolversion; - -my $input_general_settings_dot2underline; -my $input_general_settings_nastring; -my $input_general_settings_otherinfo; -my $input_general_settings_polish; -my $input_general_settings_remove; -my $input_general_settings_thread; -my $input_general_settings_inputformat; - -my $input_databases_available; -my $input_databases_required; - -my $input_databases_colswanted; -my $input_databases_exonicsplic; -my $input_databases_hgvs; -my $input_databases_operation; -my $input_databases_splicing; -my @input_databases_colswanted; -my @input_databases_exonicsplic; -my @input_databases_hgvs; -my @input_databases_operation; -my @input_databases_splicing; - -#other variables -my $script; -my $type; -my $count; -my $column_build; -my $column_species; -my @column_build; -my @column_species; -my $pattern; -my $pattern2; -my $match; -my $match_value; -my @match_value; -my @readonly; -my %editinyml; -my $folder_symlinks = '0'; -my $path_symlinks; -my @softlink_building_blocks; - -#Fixed values for searching hashes -my $level5 = "DATABASES"; - -#parameters to build command -my $protocol; -my @protocol; -my $operation; -my $argument; - -GetOptions ( - #Required input - 'protocol=s' => \$inprotocol, - 'protocolversion=s' => \$inprotocolversion, - 'script=s' => \$script, - 'invcf=s' => \$invcf, - 'outvcf=s' => \$outvcf, - 'paryml=s' => \$paryml, - 'application=s' => \$application, - 'inyml=s' => \$inyml, - 'outyml=s' => \$outyml, - 'build=s' => \$input_annovar_build, - 'species=s' => \$input_annovar_species, - #Optional input - 'edit' => \$edit, - 'run' => \$run, - #variables in GENERAL - 'dot2underline=s' => \$input_general_settings_dot2underline, - 'nastring=s' => \$input_general_settings_nastring, - 'otherinfo=s' => \$input_general_settings_otherinfo, - 'polish=s' => \$input_general_settings_polish, - 'remove=s' => \$input_general_settings_remove, - 'thread=s' => \$input_general_settings_thread, - 'inputformat=s' => \$input_general_settings_inputformat, - #variables in DATABASES - 'colswanted=s' => \$input_databases_colswanted, - 'exonicsplic=s' => \$input_databases_exonicsplic, - 'hgvs=s' => \$input_databases_hgvs, - 'operation=s' => \$input_databases_operation, - 'splicing=s' => \$input_databases_splicing, -); - -chomp $invcf; -chomp $outvcf; - -#inactive options -# 'drequired=s' => \$input_databases_required, -# 'davail=s' => \$input_databases_available, - -# 'gpathdatabase=s' => \$input_general_location_database, -# 'gpathscripts=s' => \$input_general_dir_scripts, -# 'gpathcodannovar=s' => \$input_general_location_scripts_coding_annovar, -# 'gpathtabannovar=s' => \$input_general_location_scripts_table_annovar, - -#Check input parameters -#Do some checks whether all required arguments are given - -#some code - -#arrays with all application specific parameters -#make sure keys are unique to allow for parsing of yml! - -%annovarparameters_array = ( - "COLSWANTED" => "$input_databases_colswanted", - "EXONICSPLIC" => "$input_databases_exonicsplic", - "HGVS" => "$input_databases_hgvs", - "OPERATION" => "$input_databases_operation", - "SPLICING" => "$input_databases_splicing", -); - -%annovarparameters_single = ( - "DOT2UNDERLINE" => "$input_general_settings_dot2underline", - "NASTRING" => "$input_general_settings_nastring", - "OTHERINFO" => "$input_general_settings_otherinfo", - "POLISH" => "$input_general_settings_polish", - "REMOVE" => "$input_general_settings_remove", - "THREAD" => "$input_general_settings_thread", - "INPUTFORMAT" => "$input_general_settings_inputformat", -); - -#parameter not allowed to be edited in yml -@readonly = ("hg19","human","mouse","mm10","annovar","VERSION","TABLE_ANNOVAR","refgene","cosmic"); - - -#converting input strings into input arrays -@inprotocol = split (",", $inprotocol); -@inprotocolversion = split (",", $inprotocolversion); -@input_databases_colswanted = split (",", $input_databases_colswanted); -@input_databases_exonicsplic = split (",", $input_databases_exonicsplic); -@input_databases_hgvs = split (",", $input_databases_hgvs ); -@input_databases_operation = split (",", $input_databases_operation); -@input_databases_splicing = split (",", $input_databases_splicing); - -#$in = $inyml; -#openyml_read($in); -#$in = $inyml; -#load($in); - -#check if combination of application,species, build and protocol exists -#$count = "0"; -#%in = $yml_hash; -#foreach (@inprotocol) { -#print "protocol $inprotocol[$count]\n"; -#$pattern = "$application:ANALYSIS:$input_annovar_species:$input_annovar_build:DATABASES:$inprotocol[$count]$inprotocolversion[$count]:NAME $inprotocol[$count]$inprotocolversion[$count]"; -#@pattern = ("GENERAL:species", "GENERAL:build", "GENERAL:protocol"); -#foreach @pattern { -# parse($in, $_); -# if ($match =~ /^1$/) { print "Present!\n"; }; -# $count++; -# } -#} - -#$in = $inyml; -#openyml_read($in); -#$in = $inyml; -#load_edit($in); - -#Editing section first single values, next arrays -#print "Starting editing of strings...\n"; -#foreach my $key (keys %annovarparameters_single) { -# my $innermatch = "0"; - #print "Key: $key\n"; -# foreach my $innerkey (@readonly) { -# if ($key =~ /^$innerkey$/) { - #print "Value not allowed to be changed!\n"; -# $innermatch++; }; -# } -# if ($innermatch >= 1) { -# print "Skipping: $key\n"; - #Do nothing -# } else { -# my $value = $annovarparameters_single{$key}; -# if ($value !~ /^$/) { -# print "key:$key value:$value\n"; -# $pattern = "^($application:.{1,}:$key)"; -# print "pattern: $pattern\n"; -# %in = $yml_hash_edit; -# parse($in, $pattern); -# print "editing value...: $match_value\n"; -# @match_value = split (':', $match_value); -# my $max = scalar @match_value; -# $count = "0"; -# foreach (@match_value) { -# if ($count == $max-1) { -# print "$_\n"; -# $_ =~ s/^.{1,}$/$value/g; -# print "after: $_ \n"; -# } -# $count++; -# } -# print "max $max\n"; -# if ( $max == 4 ) { -# $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]} = $match_value[3] -# } -# } -# } -#} - -#print "\n"; -#my $checkkey; - -#allow processing of arrays as well -#print "Starting editing of array values...\n"; -#foreach my $key ( keys %annovarparameters_array) { -# $count = "0"; - #print "COUNT: $count\n"; - #print "KEY: $key\n"; -# my @key = split (',', $key); -# foreach my $innerkey (@key) { -# my $innermatch = "0"; -# foreach $checkkey (@readonly) { -# if ($innerkey =~ /^$checkkey$/) { - #Do nothing -# $innermatch++; }; -# } -# if ($innermatch >= 1) { -# print "innerkey: $innerkey skipped\n"; - -# } else { -# my $value = $annovarparameters_array{$key}; -# if ($value !~ /^$/ ) { -# print "Value: $value\n"; -# my @value = split (',', $value); -# my $innercount = "0"; -# foreach my $innervalue (@value) { -# if ( $innervalue !~ /^$/ ) { -# print "\nkey: $key innerkey: $innerkey check: $checkkey value: $innervalue\n"; -# print "innercount: $innercount\n"; -# $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]$inprotocolversion[$innercount]:$key)"; -# print "pattern: $pattern\n"; -# %in = $yml_hash_edit; -# parse($in, $pattern); -# undef %in; -# print "editing value...: $match_value\n"; -# if ($match =~ /^1$/) { -# @match_value = split (':', $match_value); -# my $max = scalar @match_value; -# #$count = "0"; -# foreach (@match_value) { -# print "count: $count\n"; -# if ($count == $max-1) { -# print "empty_check: $_ \n"; -# $_ =~ s/^.{1,}$/$value[$innercount]/g; -# print "empty_check_after: $_ \n"; -# } -# print "Max: $max\n"; -# $count++; -# } -# print "change: $value[$innercount]\n"; -# print "Max: $max\n"; -# if ( $max == 9 ) { -# $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$match_value[7]} = $match_value[8]; -# } -# } else { -# -# if ($match =~ /^0$/) { -# $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]:$inprotocolversion[$innercount])"; -# %in = $yml_hash_edit; -# parse($in, $pattern); -# undef %in; -# print "editing value...: $match_value\n"; -# if ($match > 1) { -# @match_value = split (':', $match_value); -# my $max = scalar @match_value; -# $count = "0"; -# print "Max: $max\n"; -# if ( $max == 9 ) { -# $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$innerkey} = "$innervalue"; -# } -# } -# } -# } -# } else { -# #Do nothing no arugment supplied -# } -# $innercount++; -# } -# } else { -# #Do nothing no argument supplied -# } -# } -# } -#$count++; -#print "\n"; -#} - -#print "\nFinished editing...\n"; - -#Create second yml file with change values originating from cli -#$in = $outyml; -#openyml_write($in); -#%in = $yml_hash_edit; -#writeyml_edit($in); -#$in = $outyml; -#load($outyml); -#print "Finished loading second yml...\n"; - -#Load ymlhash with arguments -#$in = $paryml; -#openyml_read($in); -#$in = $paryml; -#load_arguments($in); -#print "Finished loading arguments...\n"; - - -#Fill hashes with input yaml files as defined in input and fixed arguments yaml -openyml_read ($paryml); -openyml_read ($inyml); -load ($inyml, %yml_hash, $yml_hash); -load_arguments ($paryml, %yml_hash_arguments, $yml_hash_arguments); - -##Check input yaml files format -##Pickup arguments from YAML to run command - -######################### -#!!General parser test!!# -######################### - -#Parsing both input YML files to check format, deviations results in killing the process -print "\nParsing Input YAML File\n"; -parse ($yml_hash, $data_type); -print "\nParsing Argument YAML File\n"; -parse ($yml_hash_arguments, $data_type); - -#Manually defined values to fill Annovar command -#Possible to extract from YAML using parser -my $application = 'annovar'; - -#Possible to extract from YAML using parser -my $ncbiRefSeq = 'ncbiRefSeq'; -my $cosmic = 'cosmic'; -my $dbsnp = 'avsnp'; -my $clinvar = 'clinvar'; -my $class = 'class'; - -#Possible to extract from YAML using parser -my $ncbiRefSeq_version = '_UMCU'; -my $cosmic_version = '84'; -my $dbsnp_version = '150'; -my $clinvar_version = '_20180603'; -my $class_version = '100519'; - -#possible to extract from YAML using parser -#species input fixed -my $input_annovar_species = "human"; - -#manual definition required! --> parameter in plugin galaxy / script hardcoded?? -my $language = 'perl '; - -#Possible to extract from YAML using parser -my $parse1 = 'parse1'; -my $parse2 = 'parse2'; - -#Possible to extract from YAML -my $application_path = "$application,APPLICATION,GENERAL"; - -my $ncbiRefSeq_path = "$ncbiRefSeq$ncbiRefSeq_version,NAME,ANALYSIS,DATABASES"; -my $cosmic_path = "$cosmic$cosmic_version,NAME,ANALYSIS,DATABASES"; -my $dbsnp_path = "$dbsnp$dbsnp_version,NAME,ANALYSIS,DATABASES"; -my $clinvar_path = "$clinvar$clinvar_version,NAME,ANALYSIS,DATABASES"; -my $class_path = "$class$class_version,NAME,ANALYSIS,DATABASES"; - -##Code to create folder for database softlinks - -my $path_folder_symlinks; -my @symlinkfolder_building_blocks; -my $symlinks_folder_created; - -if ( $use_alias =~ /^1$/) { - -@symlinkfolder_building_blocks = ("$parse1,LOCATION_DATABASE,$application_path","$parse1,SPECIES,$application_path"); -} - -#Setup variables for processing parse_look function -my $lookup; -my %input; -my $input; -my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5); - -$symlinks_folder_created = '0'; - - -#Code reading species en database location from YAML -if ( $use_alias =~ /^1$/) { - - foreach (@symlinkfolder_building_blocks) { - $in = $inyml; - $in2 = $paryml; - #print "loop1:$_\n"; - my @values = split /\,/, $_; - foreach (@values) { - if ( $_ !~ /^parse[12]$/ ) { - print "Error while generating soflink folder [$_]...\n"; - $path_folder_symlinks .= "$_"; - last; - } - $lookup = ""; - parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); - chomp $lookup; - $path_folder_symlinks .= "$lookup"; - $lookup = ''; - %input = ''; - $input = ''; - last; - } - } - - #Generate base name folder, numbered 0 up to endless to allow specific folder for parrallel analysis - - $symlinks_folder_created = '0'; - - $path_folder_symlinks =~ s/^\s+|\s+$//g; - $folder_symlinks =~ s/^\s+|\s+$//g; - - print "\n"; - print "Folder symlinks: $path_folder_symlinks/$folder_symlinks\n"; - - while ($symlinks_folder_created =~ /^0$/ ) { - if ( -d "$path_folder_symlinks/$folder_symlinks" and -e "$path_folder_symlinks/$folder_symlinks" ) { - print "Folder $path_folder_symlinks/$folder_symlinks is in use!\n"; - $folder_symlinks++; - } else { - print "Creating $path_folder_symlinks/$folder_symlinks...\n"; - system ( "mkdir $path_folder_symlinks/$folder_symlinks" ); - $path_symlinks = "$path_folder_symlinks/$folder_symlinks/"; - print "Created $path_symlinks\n"; - $symlinks_folder_created++; - } - } -} - -##Building annovar command: - -#Manual definition required -#blocks to build command -my @command_building_blocks = ( -"$language", -"$parse1,PATHSCRIPTS,$application_path", -"$parse1,TABLE_ANNOVAR,$application_path", -"$invcf", -"$parse1,LOCATION_DATABASE,$application_path", -"$parse1,SPECIES,$application_path", -"$parse1,BUILD,$application_path", -"$parse1,REMOVE,$application_path", -#Manually define arguments using ALIASES or regular NAME argument based on turning ALIAS on or OFF -#Define this in sections scanning the YAML file -"--protocol \'", -"$parse2,ALIAS,$ncbiRefSeq_path", -"\,", -"$parse2,NAME,$cosmic_path", -"\,", -"$parse2,ALIAS,$dbsnp_path", -"\,", -"$parse2,NAME,$clinvar_path", -"\,", -"$parse2,ALIAS,$class_path", -"\' ", -"--operation \'", -"$parse2,OPERATION,$ncbiRefSeq_path", -"\'", -"\,", -"\'", -"$parse2,OPERATION,$cosmic_path", -#"\'\,\'", -"\'", -"\,", -"\'", -"$parse2,OPERATION,$dbsnp_path", -#"\'\,\'", -"\'", -"\,", -"\'", -"$parse2,OPERATION,$clinvar_path", -#"\'\,\'", -"\'", -"\,", -"\'", -"$parse2,OPERATION,$class_path", -"\' ", -"$parse1,DOT2UNDERLINE,$application_path", -"$parse1,OTHERINFO,$application_path", -"$parse1,NASTRING,$application_path", -"$parse1,INPUTFORMAT,$application_path", -"--arg \'", -"$parse2,HGVS,$ncbiRefSeq_path", -"$parse2,SPLICING,$ncbiRefSeq_path", -"$parse2,EXONSPLIC,$ncbiRefSeq_path", -#"\'\,\'", -"\'", -"\,", -"\'", -"$parse2,COLSWANTED,$cosmic_path", -#"\'\,\'", -"\'", -"\,", -"\'", -"$parse2,COLSWANTED,$dbsnp_path", -#"\'\,\'", -"\'", -"\,", -"\'", -"$parse2,COLSWANTED,$clinvar_path", -#"\'\,\'", -"\'", -"\,", -"\'", -"$parse2,COLSWANTED,$class_path", -"\' ", -"$parse1,THREAD,$application_path", -"$parse1,POLISH,$application_path", -"-outfile $outvcf "); - -print "\n"; - - -#Create Empty command -my $test_command = ""; - -#Start building command to run -foreach (@command_building_blocks) { - $in = $inyml; - $in2 = $paryml; - #print "loop1:$_\n"; - if ( $_ =~ /^,$/ ) { - print "[NOT YAML] $_ is separator and does not require matching!\n"; - $test_command .= "$_"; - next; - } - my @values = split /\,/, $_; - foreach (@values) { - if ( $_ !~ /^parse[12]$/ ) { - print "[NOT YAML] $_ does not require matching!\n"; - $test_command .= "$_"; - last; - } - $lookup = ""; - parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); - - #If alias is enable determine which number is used for this analysis to pickup the correct database files - if ( $use_alias =~ /^1$/ ) { - #add code to add folder number for analysis - if ($values[1]=~/^SPECIES/ ) { - $lookup =~ s/^\s+|\s+$//g; - $folder_symlinks; - $lookup .= "/$folder_symlinks "; - } - } - print "[YAML]: $lookup\n"; - $test_command .= "$lookup"; - $lookup = ''; - $input = ''; - %input = ''; - last; - } -} - -print "\nalias:$use_alias\n"; - -#Created softlinks voor database files - -my @database_info; - -if ( $use_alias =~ /^1$/ ) { - print "Entering Alias stuff..\n"; - #Define databases in use - @softlink_building_blocks = ( - "$parse2,NAME,$ncbiRefSeq_path", - "$parse2,NAME,$cosmic_path", - "$parse2,NAME,$dbsnp_path", - "$parse2,NAME,$clinvar_path", - "$parse2,NAME,$class_path" ); - - print "\nCreating softlinks to Annovar database files:\n"; - - #Use parse_lookup to find argument and create symlinks in analysis database folder - foreach (@softlink_building_blocks) { - $in = $inyml; - $in2 = $paryml; - my @values = split /\,/, $_; - foreach (@values) { - if ( $_ !~ /^parse[12]$/ ) { - print "[NOT YAML] $_ does not require matching!\n"; - last; - } - $lookup = ""; - parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); - print "database=$lookup\n"; - #add code to add folder number for analysis - #print "Path_destination=$path_symlinks\n"; - #print "Path_origin=$path_folder_symlinks\n"; - my $database_detail = $lookup; - - $lookup = ""; - parse_lookup ("parse1", "BUILD", "$application","APPLICATION","GENERAL", %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); - print "Species: $lookup\n"; - my @build = split /\s/, $lookup; - #print "Species=$build[1]\n"; - my $species = $build[1]; - - $lookup = ''; - print "$values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5]\n"; - parse_lookup ($values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); - #print "Alias=$lookup\n"; - my $alias = $lookup; - - #Based on database and presence of either index file or not run softlink commands - if ( $alias =~ /^N\/A$/ ) { - my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$database_detail.txt"; - print "command: $command\n"; - system ( $command ); - } elsif ( $alias !~ /^N\/A$/ ) { - my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$alias.txt"; - print "command2: $command\n"; - system ( $command ); - } - if ( $alias =~ /^dbSNP$/ ) { - my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt.idx $path_symlinks$species\_$alias.txt.idx"; - print "command3: $command\n"; - system ( $command ); - } - if ( $alias =~ /^Annotation$/ ) { - my $command = "ln -s $path_folder_symlinks/$species\_$database_detail\Mrna.fa $path_symlinks$species\_$alias\Mrna.fa"; - print "command4: $command\n"; - system ( $command ); - } - if ( $database_detail =~ /^clinvar/ ) { - my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt.idx $path_symlinks$species\_$database_detail.txt.idx"; - print "command3: $command\n"; - system ( $command ); - } - - #Push all softlinks used in array - if ( $alias !~ /^N\/A$/ ) { - push @database_info, "$alias=$species\_$database_detail"; - } - - - $lookup = ''; - $input = ''; - %input = ''; - last; - } - } - print "Done preparing databases!\n"; -} - -#Run Generated annovar command - -print "\nResulting in following command: $test_command\n"; -################################################# -#!!IMPORTANT# DECOMMENT FOR ACTIVE USE!!# -system ($test_command); -################################################# - -my $addition_vcf; - -if ( $use_alias =~ /^1$/ ) { - - #Add translation for common databases to specific version - $addition_vcf; - my $database_details = $path_symlinks.'database_details.txt'; - $addition_vcf .= "##"; - foreach (@database_info) { - $addition_vcf .= ":$_"; - } - - #Add information to vcf output file - open(OUTVCF_EDIT, '>', "$outvcf\_edit") or die "Could not open file '$outvcf' $!"; - print "\nDone open edit output vcf file...\n"; - open(OUTVCF, '<', "$outvcf") or die "Could not open file '$outvcf' $!"; - print "Done reading vcf output vcf file...\n\n"; - - - while ( my $line = ) { - my @row = split ( "\t", $line ); - if ( $row[0] =~ /^#CHROM$/ ) { - print OUTVCF_EDIT "$addition_vcf\n"; - print OUTVCF_EDIT "$line"; - } else { - print OUTVCF_EDIT "$line"; - } - } - - close OUTVCF_EDIT; - close OUTVCF; - - #Replace orginal vcf file with edited vcf file with additional information - my $mv_command = "mv $outvcf\_edit $outvcf"; - system ($mv_command); - - print "\nAdding line to vcf file:\n"; - print "$addition_vcf\n\n"; - - #Remove folder containing softlinks - print "Removing temporary directory:\n"; - print "rm -r $path_symlinks\n\n"; - system ( "rm -r $path_symlinks" ); -} - -print "Job done program stopping.\n"; - -exit; - -##################### -#!!!END OF SCRIPT!!!# -#################### - -############# -#SUBROUTINES# -############# - -############# -#Subroutine to obtain datatype variable -############# -sub test_type { - -my $input = $_[0]; -$data_type = ref($input); -return $data_type; - -} - - -############# -#Subroutine to parse and check yaml for correct format -############# -sub parse { - -#@inyml = @{$yml_hash->{$pattern4}{$pattern5}}; -#@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}}; - -my $input_hash = $_[0]; - -my ($key_a, $key_a2, $key_b, $key_b2, $key_c, $key_c2); -my ($value_a, $value_a2, $value_b, $value_b2, $value_c, $value_c2); -my (@hash, @hash2, $hash, $hash2); - -my $count_a = '0'; -my $count_a2 = '0'; -my $count_a3 = '0'; -my $count_a4 = '0'; -my $count_b = '0'; -my $count_b2 = '0'; -my $count_b3 = '0'; -my $count_b4 = '0'; -my $count_c = '0'; -my $count_c2 = '0'; -my $count_c3 = '0'; -my $count_c4 = '0'; - -for $key_a (keys %$input_hash) { - $count_a++; - for $value_a (values %$input_hash) { - $count_a2++; - if ( $count_a =~ /^$count_a2$/ ) { - test_type($value_a); - if ($data_type =~ /^HASH$/) { - for $key_a2 ( keys %{$input_hash->{$key_a}} ) { - $count_a3++; - for $value_a2 ( values %{$input_hash->{$key_a}} ) { - $count_a4++; - if ($count_a3 =~ /^$count_a4$/) { - test_type($value_a2); - if ($data_type =~ /^HASH$/) { - print "[ERROR1] Found unexpected yml file format [ key: $key_a2 ] , please check whether your YAML file meets requirements!\n"; - exit; - } elsif ($data_type =~ /^ARRAY$/) { - @hash = @{$input_hash->{$key_a}{$key_a2}}; - foreach $hash ( @hash ) { - for $key_b ( keys %$hash ) { - $count_b++; - for $value_b (values %$hash ) { - $count_b2++; - if ($count_b =~ /^$count_b2$/) { - test_type($value_b); - if ( $data_type =~ /^HASH$/ ) { - for $key_b2 ( keys %{$hash->{$key_b}} ) { - $count_b3++; - for $value_b2 (values %{$hash->{$key_b}} ) { - $count_b4++; - if ($count_b3 =~ /^$count_b4/) { - test_type($value_b2); - if ( $data_type =~ /^HASH$/ ) { - print "[ERROR2] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n"; - exit; - } elsif ( $data_type =~ /^ARRAY$/ ) { - print "[ERROR3] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n"; - exit; - } else { - #Assume value level 4 - print "value_b2: $key_a:$key_a2:$key_b:$key_b2:$value_b2\n"; - } - } - } - $count_b4 = "0"; - } - $count_b3 = "0"; - } - elsif ( $data_type =~ /^ARRAY$/ ) { - print "[ERROR4] Found unexpected yml file format [ key: $key_b ] , please check whether your YAML file meets requirements!\n"; - exit; - } else { - #assume value level 3 - print "value_b: $key_a:$key_a2:$key_b:$value_b\n"; - } - } - } - $count_b2 = '0'; - } - $count_b = '0'; - } - } else { - #Assume value level 2 - print "Value_a2: $key_a:$value_a2\n"; - } - } - } - $count_a4 = '0'; - } - $count_a3 = '0'; - } - elsif ($data_type =~ /^ARRAY/) { - @hash2 = @{$input_hash->{$key_a}}; - foreach $hash2 ( @hash2 ) { - for my $key_c ( keys %$hash2 ) { - $count_c++; - for $value_c ( values %$hash2 ) { - $count_c2++; - if ($count_c =~ /^$count_c2$/) { - test_type($value_c); - if ( $data_type =~ /^HASH$/ ) { - for $key_c2 ( keys %{$hash2->{$key_c}} ) { - $count_c3++; - for $value_c2 ( values %{$hash2->{$key_c}} ) { - $count_c4++; - if ( $count_c3 =~ /^$count_c4$/ ) { - test_type($value_c2); - if ( $data_type =~ /^HASH$/ ) { - print "[ERROR5] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n"; - exit; - } elsif ( $data_type =~ /^ARRAY$/ ) { - print "[ERROR6] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n"; - exit; - } else { - #Assume level 4 - print "value_c2: $key_a:$key_a2:$key_c:$key_c2:$value_c2\n"; - } - } - } - $count_c4 = '0'; - } - $count_c3 = '0'; - } - elsif ( $data_type =~ /^ARRAY$/ ) { - print "[ERROR7] Found unexpected yml file format [ key: $key_c ] , please check whether your YAML file meets requirements!\n"; - exit; - } else { - #assume value level 3 - print "value_c: $key_a:$key_c:$value_c\n"; - } - } - } - $count_c2 = '0'; - } - $count_c = '0'; - } - } else { - #Assume value level 1 - print "value_a: $key_a:$value_a\n"; - } - } - } -$count_a2 = '0'; -} -$count_a = '0'; - -$data_type = ""; - -} - -######################################### -#Subroutine to fetch arguments from YAML# -######################################### - -sub parse_lookup { - -my $hash; -my %hash; -my $hash_a; -my %hash_a; -my %hash_a2; -my $hash_a2; -my @inyml; -my @inarguments; -my $count; -my $match; -my $match2; -my $argument; -my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5); - -if ( $_[0] =~ /^parse1$/ ) { -$pattern1 = $_[1]; -$pattern2 = $_[2]; -$pattern3 = $_[3]; -$pattern4 = $_[4]; -@inyml = @{$yml_hash->{$pattern4}}; -@inarguments = @{$yml_hash_arguments->{$pattern4}}; -} elsif ( $_[0] =~ /^parse2$/) { -$pattern1 = $_[1]; -$pattern2 = $_[2]; -$pattern3 = $_[3]; -$pattern4 = $_[4]; -$pattern5 = $_[5]; -@inyml = @{$yml_hash->{$pattern4}{$pattern5}}; -@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}}; -} - -$match = "0"; - -foreach my $hash ( @inyml ) { - $hash = \%$hash; - if ($hash->{$pattern3} =~ /^$pattern2$/ ) { - my $value = $hash->{$pattern1}; - if ($value eq "" ) { - print "Error unknown argument \"$pattern1\" for annotation database \"$pattern2\" in input YAML!\n"; - exit; - } - foreach my $hash_a (@inarguments) { - my $hash_a2 = $hash_a->{$pattern1}; - if ( $hash_a->{$pattern3} =~ /^$pattern2$/ ) { - $match2++; - if ($hash_a2 =~ /^HASH/ ) { - $argument = $hash_a2->{$value}; - } else { - $argument = $hash_a2; - } - if ($argument eq "" ) { - print "Unknown given option \"$value\" for argument \"$pattern1\" in YAML arguments file or input YAML for database \"$pattern2\"!\n"; - print "Please check your input and start the plugin again.\n"; - exit; - } - } - if ($match2 =~ /^1$/) { - #Do nothing - } elsif ( $match2 > 1 ) { - print "Error Multiple matches for database \"$pattern2\" database in YAML argumentsfile.\n"; - print "Please check your input and start the plugin again.\n"; - exit; - } elsif ( $match2 =~ /^0$/ ) { - print "Error no matches for database \"$pattern2\" in YAML argumentsfile.\n"; - print "Please check your input and start the plugin again.\n"; - exit; - } - } - $match++; - $lookup = $argument; - } -} -if ($match =~ /^1$/) { - #Do nothing -} else { - print "match: $match \n"; - print "Error in yaml file multiple instances matching input yaml, please check input!\n"; - exit; -} - -return $lookup; - -} - -############# -#Subroutine for opening out / edited yaml file -############# -sub openyml_write { -my $outyml = $in; -#Create second yml file with change values originating from cli -open(OUT, '>', "$outyml") or die "Could not open file '$outyml' $!"; -print "Done creating output file...\n"; -} - -############# -#Subroutine to check whether yaml file exists -############# -sub openyml_read { -$in = $_[0]; -chomp $in; -my $inyml = $in; -open(FILE, '<', "$inyml") or die "Could not open file '$inyml' $!"; -close FILE; -print "Done reading\n"; -} - -############# -#Subroutine for reading argument yaml file in hash -############## -sub load_arguments { -$in = $_[0]; -$yml_hash_arguments = LoadFile($in); -return($yml_hash_arguments, %yml_hash_arguments); -} - -############# -#Subroutine for reading input yaml file in hash -############## -sub load { -$in = $_[0]; -$yml_hash = LoadFile($in); -return($yml_hash, %yml_hash); -} - -############# -#Subroutine for reading edited input yml file in hash -############## -sub load_edit { -$in = $_[0]; -$yml_hash_edit = LoadFile($in); -} - -############# -#Subroutine for editing content yaml hash -############## -sub writeyml { -my $yml_hash = $in; -$writer = sub { - my $line = shift; - print FILE "$line\n"; -}; -my $file = Data::YAML::Writer->new; -$file->write( $yml_hash, $writer ); -} - -############# -#Subroutine for editing content edited yaml hash -############## -sub writeyml_edit { -my $yml_hash = $in; -$writer = sub { - my $line = shift; - print OUT "$line\n"; -}; -my $file = Data::YAML::Writer->new; -$file->write( $yml_hash_edit, $writer ); -close OUT; -} diff -r 0044b989f60f -r 237d0eef2ad3 annovar_yaml/annovar_yaml.xml --- a/annovar_yaml/annovar_yaml.xml Mon Jun 10 09:21:03 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ - - - perl-yaml - perl-yaml-tiny - perl-yaml-xs - perl-data-dumper - perl-getopt-long - - - - - - - - - - - - - -