--[[ $%BEGINLICENSE%$
 Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License as
 published by the Free Software Foundation; version 2 of the
 License.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 02110-1301  USA

 $%ENDLICENSE%$ --]]


require("mysql.tokenizer")

module("proxy.tokenizer", package.seeall)

---
-- normalize a query
--
-- * remove comments
-- * quote literals
-- * turn constants into ?
-- * turn tokens into uppercase
--
-- @param tokens a array of tokens
-- @return normalized SQL query
-- 
-- @see tokenize
function normalize(tokens)
	-- we use a string-stack here and join them at the end
	-- see http://www.lua.org/pil/11.6.html for more
	--
	local stack = {}
	-- literals that are SQL commands if they appear at the start
	-- (all uppercase)
	local literal_keywords = {
		["COMMIT"] = { },
		["ROLLBACK"] = { },
		["BEGIN"] = { },
		["START"] = { "TRANSACTION" },
	}

	for i = 1, #tokens do
		local token = tokens[i]

		-- normalize the query
		if token["token_name"] == "TK_COMMENT" then
		elseif token["token_name"] == "TK_COMMENT_MYSQL" then
			-- a /*!... */ comment
			--
			-- we can't look into the comment as we don't know which server-version
			-- we will talk to, pass it on verbatimly
			table.insert(stack, "/*!" ..token.text .. "*/ ")
		elseif token["token_name"] == "TK_LITERAL" then
			if token.text:sub(1, 1) == "@" then
				-- append session variables as is
				table.insert(stack, token.text .. " ")
			elseif #stack == 0 then -- nothing is on the stack yet
				local u_text = token.text:upper()

				if literal_keywords[u_text] then
					table.insert(stack, u_text .. " ")
				else
					table.insert(stack, "`" .. token.text .. "` ")
				end
			elseif #stack == 1 then
				local u_text = token.text:upper()

				local starting_keyword = stack[1]:sub(1, -2)

				if literal_keywords[starting_keyword] and
				   literal_keywords[starting_keyword][1] == u_text then
					table.insert(stack, u_text .. " ")
				else
					table.insert(stack, "`" .. token.text .. "` ")
				end
			else
				table.insert(stack, "`" .. token.text .. "` ")
			end
		elseif token["token_name"] == "TK_STRING" or
		       token["token_name"] == "TK_INTEGER" or
		       token["token_name"] == "TK_FLOAT" then
			table.insert(stack, "? ")
		elseif token["token_name"] == "TK_FUNCTION" then
			table.insert(stack,  token.text:upper())
		else
			table.insert(stack,  token.text:upper() .. " ")
		end
	end

	return table.concat(stack)
end

---
-- call the included tokenizer
--
-- this function is only a wrapper and exists mostly
-- for constancy and documentation reasons
function tokenize(packet)
	return tokenizer.tokenize(packet)
end

---
-- return the first command token
--
-- * strips the leading comments
function first_stmt_token(tokens)
	for i = 1, #tokens do
		local token = tokens[i]
		-- normalize the query
		if token["token_name"] == "TK_COMMENT" then
		elseif token["token_name"] == "TK_LITERAL" then
			-- commit and rollback at LITERALS
			return token
		else
			-- TK_SQL_* are normal tokens
			return token
		end
	end

	return nil
end

---
--[[

   returns an array of simple token values
   without id and name, and stripping all comments
   
   @param tokens an array of tokens, as produced by the tokenize() function
   @param quote_strings : if set, the string tokens will be quoted
   @see tokenize
--]]
function bare_tokens (tokens, quote_strings)
    local simple_tokens = {}
	for i = 1, #tokens do
		local token = tokens[i]
        if (token['token_name'] == 'TK_STRING') and quote_strings then
            table.insert(simple_tokens, string.format('%q', token['text'] ))
        elseif (token['token_name'] ~= 'TK_COMMENT') then
            table.insert(simple_tokens, token['text'])
        end
    end
    return simple_tokens
end

---
--[[
    
   Returns a text query from an array of tokens, stripping off comments
  
   @param tokens an array of tokens, as produced by the tokenize() function
   @param start_item ignores tokens before this one
   @param end_item ignores token after this one
   @see tokenize
--]]
function tokens_to_query ( tokens , start_item, end_item )
    if not start_item then
        start_item = 1
    end
    if not end_item then
        end_item = #tokens
    end
    local counter  = 0
    local new_query = ''
	for i = 1, #tokens do
		local token = tokens[i]
        counter = counter + 1
        if (counter >= start_item and counter <= end_item ) then
            if (token['token_name'] == 'TK_STRING') then
                new_query = new_query .. string.format('%q', token['text'] )
            elseif token['token_name'] ~= 'TK_COMMENT' then
                new_query = new_query .. token['text'] 
            end
            if (token['token_name'] ~= 'TK_FUNCTION')
               and 
               (token['token_name'] ~= 'TK_COMMENT') 
            then
                new_query = new_query .. ' '
            end
        end
    end
    return new_query
end

---
--[[
   returns an array of tokens, stripping off all comments

   @param tokens an array of tokens, as produced by the tokenize() function
   @see tokenize, simple_tokens
--]]
function tokens_without_comments (tokens)
    local new_tokens = {}
	for i = 1, #tokens do
		local token = tokens[i]
        if (token['token_name'] ~= 'TK_COMMENT') then
            table.insert(new_tokens, token['text'])
        end
    end
    return new_tokens
end