#!/usr/bin/perl use strict; my $VERSION = 0.1; ############################################################################################## # # fcisql.pl - Converts functional SQL statements to SQL. # ############################################################################################## # # PLEASE NOTE: THIS IS A RESEARCH PROTOTYPE PROGRAM. NO OPTIMIZATION, ETC, HAS BEEN # IMPLEMENTED. USE AT YOUR OWN RISK. OF COURSE WE COULD HAVE USED BISON OR YACC, # BUT WE DECIDED TO USE PERL TO EFFECTIVELY AND QUICKLY GET OUR IDEA ACROSS. # QUESTIONS AND/OR COMMENTS ARE WELCOME. # # # Copyright 2005 Regents of University of California. # ############################################################################################## # # Run Program: # > perl funsql.pl FUNCTION_DEFINITION SQL_FILE # # where, # # FUNCTION_DEFINITION is file where each line must be in this format: # # define function NAME TABLE args=[ARGS [QUALIFIER], ..] proj=[PROJECTION_COLS, ..] # # where NAME is the function name # TABLE is the table in the database # PROJECTION_COLS are the col(s) to project out after computing the query. # ARGS are the table's columns as arguments appearing in the WHERE clause of the sql statement. # QUALIFIER is the operator on a single ARG. # # SQL_FILE is just a file containing sql and/or functional sql statements. This file gets converted # to standard SQL. # recognizes "functional" notation: f(g( args) ) # "dot" notation (args).f.g # or a combination of both "functional" and "dot" notation: f(g( (args)$h)) # # f(g(args)) <==> (args).f.g # # # output of program is to the terminal. You can redirect the output to a .sql file. # # # # # Sample FUNCTION_DEFINITION: please look at funcdef.sample # # Sample SQL_FILE: please look at sqltestX.sql # # ############################################################################################### ############# # VARIABLES ############# my $func_defs = {}; # hash storing function definitions # constants = TOKEN VALUES my $LEFT_PAREN = 0; my $RIGHT_PAREN = 1; my $ARGUMENT = 2; my $OTHER = 3; my $FUNCTION = 4; my $DOT = 5; my $LEFT_BRACKET = 6; my $RIGHT_BRACKET = 7; my $DOT_MODE = 2; my $FUNC_MODE = 1; ############# # FUNCTIONS ############# ## # DESCRIPTION: processes the FUNCTION_DEFINITION file and stores the information in $func_defs hash. # sub process_func_def { my $file = shift; open(FILE, $file) or die "Can't open $file. Cannot continue.\n\n"; while (my $line = ) { if ($line =~ /define\s+?function\s+?(\S+)\s+?(\S+)\s+?arg=\[(.*)\]\s+?proj=\[(.*)\]/) { #print "GOT: $1, $2, $3, $4\n"; if ($func_defs->{$1}) { print "-- >> WARNING: function $1 already exists. Using the latest version.\n"; } $func_defs->{$1}->{table} = $2; $func_defs->{$1}->{proj} = $4; # this will need to be parsed into array. $func_defs->{$1}->{arg} = $3; # this will need to be parsed into array. } else { print "-- >> ERROR: UNRECOGNIZED $line\n"; } } close FILE; } ## # DESCRIPTION: determines if input string is a function. # GIVEN: a name # # RETURNS: 0= false, 1=true sub isfunction { my $arg = shift; #print "ISFUNC: $arg\n"; my @funcs = keys %{$func_defs}; foreach my $func (@funcs) { if ($arg eq $func) { return 1; } } return 0; } ## # DESCRIPTION: processes the sql file and returns the converted sql # # sub process_sql_file { my $file = shift; my @funcs = keys %{$func_defs}; open(FILE, $file) or die "Can't open $file. Cannot continue.\n\n"; while (my $line = ) { # modes: my $IN_FUNC_MODE = 0; my $IN_DOT_MODE = 0; # storage my $buf = ""; # stores other # stacks #these 2 stacks should always be the same size # as _mode stores the "mode" value while the other stores the # actual string value. my @parsestack = (); my @parsestack_mode = (); #stack2 # to hold the dot notation. my @dot_stack = (); my @dot_stack_mode = (); # args my $PUSH_ARGS = 0; my $buf_arg = ""; # counters my $left_parens = 0; my $right_parens = 0; my $numrightparentoadd = 0; my $dotnumrightparentoadd = -1; # iterate line for(my $i =0; $i < length($line); $i += 1) { my $ch = substr($line, $i,1); if ($IN_DOT_MODE) { my $ch2 = substr($line, $i,2); if ($ch2 eq ").") { $PUSH_ARGS = 0; #print "DARG: $buf_arg\n"; push @dot_stack, $buf_arg; push @dot_stack_mode, $ARGUMENT; push @dot_stack, "("; push @dot_stack_mode, $LEFT_PAREN; $buf_arg = ""; $i += 1; } elsif ($ch eq "," && $PUSH_ARGS) { #print "DARG: $buf_arg\n"; push @dot_stack, $buf_arg; push @dot_stack_mode, $ARGUMENT; $buf_arg = ""; } elsif ($PUSH_ARGS) { $buf_arg .= $ch; } elsif ((($ch =~ /\s/) || ($ch eq ")") ) && !$PUSH_ARGS) { # we are technically done with dot mode # but we must add ")" to beginning of stack to balance out parens @dot_stack = reverse @dot_stack; @dot_stack_mode = reverse @dot_stack_mode; for (my $j = 0; $j < $dotnumrightparentoadd; $j += 1){ push @dot_stack, ")"; push @dot_stack_mode, $RIGHT_PAREN; } $i += $dotnumrightparentoadd; # move the cursor the correct #. @dot_stack = reverse @dot_stack; @dot_stack_mode = reverse @dot_stack_mode; #print "DOTSTACK......\n"; #print_array(\@dot_stack); #print "DOTSTACK...... END\n"; while (scalar @dot_stack > 0){ my $v = pop @dot_stack; my $m = pop @dot_stack_mode; push @parsestack, $v; push @parsestack_mode, $m; } # but because dot mode could have originated from # func_mode, we should push the remainding ")" for it. for (my $j = $i; $j < length($line); $j += 1) { my $c = substr($line,$j,1); if ($c eq ")") { $right_parens += 1; push @parsestack, $c; # TODO push @parsestack_mode, $RIGHT_PAREN; } elsif ( $c eq " ") { # ok } else { # not ok, break out. last doesnt work?? $j = length($line); } } if ($left_parens != $right_parens) { print "-- >> WARNING: Error in matching ( and ). Please look at line: $line\n\n"; } # it's over. # cleanup vars $IN_DOT_MODE = 0; $IN_FUNC_MODE = 0; $PUSH_ARGS = 0; $buf_arg = ""; $numrightparentoadd = 0; $dotnumrightparentoadd = -1; @dot_stack = (); @dot_stack_mode = (); } elsif (!$PUSH_ARGS && $ch eq ".") { push @dot_stack, "("; push @dot_stack_mode, $LEFT_PAREN; $numrightparentoadd += 1; } elsif (!$PUSH_ARGS && substr($line, $i) =~ /^([a-zA-Z_0-9]+)/ ) { if ( isfunction($1)){ #print "DFUNC: $1\n"; push @dot_stack, $1; push @dot_stack_mode, $FUNCTION; $i = $i + length($1) - 1; $dotnumrightparentoadd += 1; } else { print "-- >> WARNING: Not a function: $1\n\n"; } } else { # do nothing } } elsif ($IN_FUNC_MODE) { if ($PUSH_ARGS && $ch eq ",") { #print "FARG: $buf_arg\n"; push @parsestack, $buf_arg; push @parsestack_mode, $ARGUMENT; $buf_arg = ""; } elsif ($PUSH_ARGS && $ch eq ")") { $PUSH_ARGS = 0; #print "FARG2: $buf_arg\n"; push @parsestack, $buf_arg; push @parsestack_mode, $ARGUMENT; $buf_arg = ""; $right_parens += 1; push @parsestack, $ch; push @parsestack_mode, $RIGHT_PAREN; } elsif ($PUSH_ARGS) { $buf_arg .= $ch; } elsif ($ch eq "(" && !$PUSH_ARGS) { if ( substr($line, $i) =~ /^\(\s*\((.*)\)\.([a-zA-Z_0-9]+)/ ) { # dot notation inside a function push @parsestack, $ch; push @parsestack_mode, $LEFT_PAREN; $IN_FUNC_MODE = 0; $left_parens += 1; } elsif ( substr($line, $i) =~ /^\(\s*([a-zA-Z_0-9]+)\s*\(/ ) { # function inside a function if ( isfunction($1)){ #print "FFUNC: $1\n"; push @parsestack, $ch; push @parsestack_mode, $LEFT_PAREN; push @parsestack, $1; push @parsestack_mode, $FUNCTION; $i = $i + length($1) - 1; $left_parens += 1; } else { print "-- >> WARNING: Not a function: $1\n\n"; } } else { push @parsestack, $ch; push @parsestack_mode, $LEFT_PAREN; $left_parens += 1; # arg $PUSH_ARGS = 1; } } elsif ($ch eq ")" && !$PUSH_ARGS) { # we'll never get here if we go into dot_mode... # so one way to check is to count the number of left and right parens that we see. they should match. $right_parens += 1; # this is one way of getting out of func_mode. # must provide another way if we go into dot_mode. # when dotmode completes, it automatically checks for ). so it # has been covered. :) push @parsestack, $ch; push @parsestack_mode, $RIGHT_PAREN; if ($left_parens == $right_parens) { $IN_FUNC_MODE = 0; #print "HIT ME\n\n"; } } } else { # determine what mode we are in, or push chars into the buffer if ($ch eq "(") { # reached a potential "dot_mode" # to be safe, put everything we've seen so far into stack # we won't be sure it's "dot_mode" until we've seen ").FUNCNAME". # thus, let's check that my $newl = substr($line, $i); if ( $newl =~ /^\((.*)\)\.([a-zA-Z_0-9]+)/ ) { if ( isfunction($2)){ # we are sure we are in dot mode #print "GO: $newl"; if ($buf =~ /^\s*$/) {} else { push @parsestack, $buf; push @parsestack_mode, $OTHER; $buf = ""; } push @dot_stack, ")"; push @dot_stack_mode, $RIGHT_PAREN; $IN_DOT_MODE = 1; $PUSH_ARGS =1; } else { print "-- >> WARNING: Your SQL looks funny at $newl\n\n"; } } } elsif (substr($line, $i) =~ /^([a-zA-Z_0-9]+)\s*\(/ ) { # is this going to be a function? if ( isfunction($1)){ # we are sure we are in func mode #print "GOF: $1\n"; my $name = $1; # push whatever was on the buffer if ($buf =~ /^\s*$/) {} else { push @parsestack, $buf; push @parsestack_mode, $OTHER; $buf = ""; } # push function push @parsestack, $name; push @parsestack_mode, $FUNCTION; $IN_FUNC_MODE = 1; # change i $i = $i + length($1) - 1; } } else { #print "BUF: $ch\n"; $buf .= $ch; } } # end decide } # end for i # push whatever came at the end of the line. if ($buf =~ /^\s*$/) {} else { push @parsestack, $buf; push @parsestack_mode, $OTHER; $buf = ""; } # call parser. At this point, we know that this line was parsed correctly, # so we could now go into looking at the stacks. #print "PROCESS $line\n"; processStack(\@parsestack, \@parsestack_mode); } # end while } ## # 1. since data is all in functional mode, convert to SQL. # 2. the purpose of this function groups the queries together, and # converts them into sql. sub processStack { my ($arg, $arg2) = @_; my @array = @$arg; my @tokenvals = @$arg2; my @dot_array = (); my @new_array = (); my $left_paren = 0; my $right_paren = 0; my $left2_paren = 0; my @cur_args = (); my $get_args = 0; my $get_func = 0; my @result_array; my @result_array_mode; # do step 1 while ( scalar @array > 0) { my $token = pop @array; my $tokenval = pop @tokenvals; if ($tokenval eq $RIGHT_PAREN) { $right_paren += 1; $get_args = 1; } elsif ($tokenval eq $LEFT_PAREN) { $left_paren += 1; $left2_paren += 1; $get_func = 1; $get_args = 0; } elsif ($get_args == 1 && $tokenval == $ARGUMENT ) { push @cur_args, $token; #print "PUSH2 arg: $token\n"; } elsif ($get_func == 1 && $tokenval == $FUNCTION){ # convert this! if ($left_paren == 1 && $left_paren == $left2_paren) { # this is not a nested sql query #print_array(\@cur_args); my $res = convertToSQL($token, \@cur_args, 0); push @result_array, $res; push @result_array_mode, 0; #print "RESULT: $res 0\n"; if ($right_paren == $left2_paren) { $right_paren = 0; $left2_paren = 0; } $left_paren = 0; } else { # this IS a nested sql query my $res = convertToSQL($token, \@cur_args, 1); push @result_array, $res; push @result_array_mode, 1; #print "RESULT: $res 1\n"; if($right_paren == $left2_paren) { $left2_paren = 0; $right_paren = 0; } $left_paren -= 1; } @cur_args = (); # reset $get_args = 0; $get_func = 0; #$left_paren = 0; #$right_paren = 0; } else { # this token doesnt belong to any functions push @result_array, $token; push @result_array_mode, 2; #print "RESULT2: $token 2\n"; } } # end while # process @result_array. # bottom of stack are the functions that were processed first, # so all we have to do to get the right order, is to just pop them off. my $orig_size = scalar @result_array; @result_array = reverse @result_array; @result_array_mode = reverse @result_array_mode; #print "RESULT ARRAY\n"; #print_array(\@result_array); #print "...\n"; #print_array(\@result_array_mode); my @grouped = (); my $cur_group = ""; while (scalar @result_array > 0 ) { my $res = pop @result_array; my $mode = pop @result_array_mode; my $next_mode = -1; if (scalar @result_array > 0 ) { $next_mode = pop @result_array_mode; push @result_array_mode, $next_mode; } if ($mode == 0 && $next_mode == 1) { $cur_group = "($res) $cur_group\n"; } elsif (( $mode == 0 && ($next_mode == -1 || $next_mode == 2)) || ( $mode == 1 && ($next_mode == -1 || $next_mode == 0 || $next_mode == 2))){ # end group $cur_group = "($res $cur_group)\n"; push @grouped, $cur_group; $cur_group = ""; } elsif ($mode == 1 && $next_mode == 1) { $cur_group = "($res $cur_group)"; } elsif ($mode == 2) { push @grouped, $res; $cur_group = ""; } } process_group_stack( \@grouped) ; } ## # GIVEN: the converted SQL array, prints it out to terminal. # # sub process_group_stack { my $arg = shift; my @array = @$arg; my $result = ""; foreach my $x (reverse @array ) { $result .= $x; } print $result; } ## # GIVEN: a function, its arguments, mode = {0,1} # 0 = "=", 1 = "IN" # RETURNS: SQL equivalent of what the function does. # sub convertToSQL { my ($funcname, $arrref, $mode) = @_; my @array = reverse @$arrref; #print "convertTOSQL: $funcname $mode\n"; my $table = $func_defs->{$funcname}->{table}; # do projectinos, SELECT my $res = "SELECT "; my @projs = split(" ",$func_defs->{$funcname}->{proj}); foreach my $proj (@projs) { $res .= "$table.$proj "; } # do FROM $res .= "\nFROM $table "; # do WHERE my @rawargs = split(",", $func_defs->{$funcname}->{arg}); #if (scalar @rawargs != scalar @array +1) { #print "-- >> WARNING: Function $funcname expects the correct number of arguments. Expected:".(scalar @rawargs)." Saw:".(scalar @array)."\n"; #print_array(\@array); # } if (scalar @rawargs > 0) { $res .= "\nWHERE "; if ($mode == 0 ) { for (my $i =0; $i < scalar @rawargs; $i += 1) { my $arg = $rawargs[$i]; my $col; my $arr = $array[$i]; # dtermine which format if ($arg =~ /\[(\S+)\s+\[(\S+)\s+(\S+)\s*\]\s*\]/) { my $col = $1; my $op = $2; my $exp = $3; $arr =~ s/\'//g; $exp =~ s/X/$arr/; $res .= "$table.$col $op $exp"; } elsif ($arg =~ /\[(\S+)\s+\[(\S+)\]\s*\]/ ) { $res = "$table.$1 $2 $arr"; } else { # default $col = $arg; $res .= "$table.$col "; $res .= "= "; $res .= "$arr "; } if (scalar @rawargs == 1 || $i == (scalar @rawargs - 1)) { } else { $res .= "AND "; } } } else { # TODO: this just takes the first argument... # in the future, will need to make it consider all arguments my ($col, $op, $exp); for (my $i =0; $i < scalar @rawargs; $i += 1) { my $arg = $rawargs[$i]; if ($arg =~ /\[(\S+)\s+\[(\S+)\s+(\S+)\s*\]\s*\]/) { $col = $1; $op = $2; $exp = $3; if (scalar @rawargs == 1 || $i == (scalar @rawargs - 1)) { $res .= "$table.$col "; } else { $res .= "$table.$col $op $exp"; } } elsif ($arg =~ /\[(\S+)\s+\[(\S+)\]\s*\]/ ) { if (scalar @rawargs == 1 || $i == (scalar @rawargs - 1)) { $res .= "$table.$1 "; } else { $res = "$table.$1 $2 "; } } else { # default $col = $arg; $res .= "$table.$col "; } if (scalar @rawargs == 1 || $i == (scalar @rawargs - 1)) { } else { $res .= " AND "; } } $res .= " IN "; } } else { # "EXISTS"? if ($mode == 0) { #nothing } else { $res .= "EXIST "; } } return $res; } # DEBUG: prints the array of elements. # sub print_array { my $arg = shift; my @array = @$arg; foreach my $q (@array) { print "PA: $q\n"; } } ## # Prints the usage to out. # sub usage() { print "-------------------------------------------------\n"; print " Run Program: \n\n"; print " > perl funsql.pl FUNCTION_DEFINITION SQL_FILE \n\n"; print " where, \n\n"; print " FUNCTION_DEFINITION is file where each line must be in this format: \n\n"; print " define function NAME TABLE args=[ARGS [QUALIFIER], ..] proj=[PROJECTION_COLS, ..] \n\n"; print " where NAME is the function name \n"; print " TABLE is the table in the database \n"; print " PROJECTION_COLS are the col(s) to project out after computing the query. \ \n"; print " ARGS are the table's columns as arguments appearing in the WHERE clause of the sql statement. \ \n"; print " QUALIFIER is the operator on a single ARG. \ \n\n\n"; print " SQL_FILE is just a file containing sql and/or functional sql statements. This file gets converted to standard SQL. \ \n\n"; print " recognizes \"functional\" notation: f(g( args) ) \ \n\n"; print " \"dot\" notation (args).f.g \ \n\n"; print " or a combination of both \"functional\" and \"dot\" notation: f(g( (args).h)) \n\n"; print " f(g(args)) <==> (args).f.g \n\n"; print " output of program is to the terminal. You can redirect the output to a .sql file. \n\n\n"; print " Sample FUNCTION_DEFINITION: please look at funcdef.sample \n\n"; print " Sample SQL_FILE: please look at sqltestX.sql\n\n"; print " > perl funsql.pl funcdef.sample sqltestX.sql \n\n"; print "-------------------------------------------------\n"; } ## # Main function entry of program. # sub main { if (scalar @ARGV != 2) { usage(); exit(1); } process_func_def($ARGV[0]); process_sql_file($ARGV[1]); } main();