platform/libinjection/src/libinjection_sqli.c
2019-08-16 10:42:40 -04:00

2325 lines
70 KiB
C

/**
* Copyright 2012,2016 Nick Galbreath
* nickg@client9.com
* BSD License -- see COPYING.txt for details
*
* https://libinjection.client9.com/
*
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <assert.h>
#include <stddef.h>
#include "libinjection.h"
#include "libinjection_sqli.h"
#include "libinjection_sqli_data.h"
#define LIBINJECTION_VERSION "3.9.2"
#define LIBINJECTION_SQLI_TOKEN_SIZE sizeof(((stoken_t*)(0))->val)
#define LIBINJECTION_SQLI_MAX_TOKENS 5
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#define CHAR_NULL '\0'
#define CHAR_SINGLE '\''
#define CHAR_DOUBLE '"'
#define CHAR_TICK '`'
/* faster than calling out to libc isdigit */
#define ISDIGIT(a) ((unsigned)((a) - '0') <= 9)
#if 0
#define FOLD_DEBUG printf("%d \t more=%d pos=%d left=%d\n", __LINE__, more, (int)pos, (int)left);
#else
#define FOLD_DEBUG
#endif
/*
* not making public just yet
*/
typedef enum {
TYPE_NONE = 0
, TYPE_KEYWORD = (int)'k'
, TYPE_UNION = (int)'U'
, TYPE_GROUP = (int)'B'
, TYPE_EXPRESSION = (int)'E'
, TYPE_SQLTYPE = (int)'t'
, TYPE_FUNCTION = (int)'f'
, TYPE_BAREWORD = (int)'n'
, TYPE_NUMBER = (int)'1'
, TYPE_VARIABLE = (int)'v'
, TYPE_STRING = (int)'s'
, TYPE_OPERATOR = (int)'o'
, TYPE_LOGIC_OPERATOR = (int)'&'
, TYPE_COMMENT = (int)'c'
, TYPE_COLLATE = (int)'A'
, TYPE_LEFTPARENS = (int)'('
, TYPE_RIGHTPARENS = (int)')' /* not used? */
, TYPE_LEFTBRACE = (int)'{'
, TYPE_RIGHTBRACE = (int)'}'
, TYPE_DOT = (int)'.'
, TYPE_COMMA = (int)','
, TYPE_COLON = (int)':'
, TYPE_SEMICOLON = (int)';'
, TYPE_TSQL = (int)'T' /* TSQL start */
, TYPE_UNKNOWN = (int)'?'
, TYPE_EVIL = (int)'X' /* unparsable, abort */
, TYPE_FINGERPRINT = (int)'F' /* not really a token */
, TYPE_BACKSLASH = (int)'\\'
} sqli_token_types;
/**
* Initializes parsing state
*
*/
static char flag2delim(int flag)
{
if (flag & FLAG_QUOTE_SINGLE) {
return CHAR_SINGLE;
} else if (flag & FLAG_QUOTE_DOUBLE) {
return CHAR_DOUBLE;
} else {
return CHAR_NULL;
}
}
/* memchr2 finds a string of 2 characters inside another string
* This a specialized version of "memmem" or "memchr".
* 'memmem' doesn't exist on all platforms
*
* Porting notes: this is just a special version of
* astring.find("AB")
*
*/
static const char *
memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
{
const char *cur = haystack;
const char *last = haystack + haystack_len - 1;
if (haystack_len < 2) {
return NULL;
}
while (cur < last) {
/* safe since cur < len - 1 always */
if (cur[0] == c0 && cur[1] == c1) {
return cur;
}
cur += 1;
}
return NULL;
}
/**
* memmem might not exist on some systems
*/
static const char *
my_memmem(const char* haystack, size_t hlen, const char* needle, size_t nlen)
{
const char* cur;
const char* last;
assert(haystack);
assert(needle);
assert(nlen > 1);
last = haystack + hlen - nlen;
for (cur = haystack; cur <= last; ++cur) {
if (cur[0] == needle[0] && memcmp(cur, needle, nlen) == 0) {
return cur;
}
}
return NULL;
}
/** Find largest string containing certain characters.
*
* C Standard library 'strspn' only works for 'c-strings' (null terminated)
* This works on arbitrary length.
*
* Performance notes:
* not critical
*
* Porting notes:
* if accept is 'ABC', then this function would be similar to
* a_regexp.match(a_str, '[ABC]*'),
*/
static size_t
strlenspn(const char *s, size_t len, const char *accept)
{
size_t i;
for (i = 0; i < len; ++i) {
/* likely we can do better by inlining this function
* but this works for now
*/
if (strchr(accept, s[i]) == NULL) {
return i;
}
}
return len;
}
static size_t
strlencspn(const char *s, size_t len, const char *accept)
{
size_t i;
for (i = 0; i < len; ++i) {
/* likely we can do better by inlining this function
* but this works for now
*/
if (strchr(accept, s[i]) != NULL) {
return i;
}
}
return len;
}
static int char_is_white(char ch) {
/* ' ' space is 0x32
'\t 0x09 \011 horizontal tab
'\n' 0x0a \012 new line
'\v' 0x0b \013 vertical tab
'\f' 0x0c \014 new page
'\r' 0x0d \015 carriage return
0x00 \000 null (oracle)
0xa0 \240 is Latin-1
*/
return strchr(" \t\n\v\f\r\240\000", ch) != NULL;
}
/* DANGER DANGER
* This is -very specialized function-
*
* this compares a ALL_UPPER CASE C STRING
* with a *arbitrary memory* + length
*
* Sane people would just make a copy, up-case
* and use a hash table.
*
* Required since libc version uses the current locale
* and is much slower.
*/
static int cstrcasecmp(const char *a, const char *b, size_t n)
{
char cb;
for (; n > 0; a++, b++, n--) {
cb = *b;
if (cb >= 'a' && cb <= 'z') {
cb -= 0x20;
}
if (*a != cb) {
return *a - cb;
} else if (*a == '\0') {
return -1;
}
}
return (*a == 0) ? 0 : 1;
}
/**
* Case sensitive string compare.
* Here only to make code more readable
*/
static int streq(const char *a, const char *b)
{
return strcmp(a, b) == 0;
}
/**
*
*
*
* Porting Notes:
* given a mapping/hash of string to char
* this is just
* typecode = mapping[key.upper()]
*/
static char bsearch_keyword_type(const char *key, size_t len,
const keyword_t * keywords, size_t numb)
{
size_t pos;
size_t left = 0;
size_t right = numb - 1;
while (left < right) {
pos = (left + right) >> 1;
/* arg0 = upper case only, arg1 = mixed case */
if (cstrcasecmp(keywords[pos].word, key, len) < 0) {
left = pos + 1;
} else {
right = pos;
}
}
if ((left == right) && cstrcasecmp(keywords[left].word, key, len) == 0) {
return keywords[left].type;
} else {
return CHAR_NULL;
}
}
static char is_keyword(const char* key, size_t len)
{
return bsearch_keyword_type(key, len, sql_keywords, sql_keywords_sz);
}
/* st_token methods
*
* The following functions manipulates the stoken_t type
*
*
*/
static void st_clear(stoken_t * st)
{
memset(st, 0, sizeof(stoken_t));
}
static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len,
const char value)
{
/* done to eliminate unused warning */
(void)len;
st->type = (char) stype;
st->pos = pos;
st->len = 1;
st->val[0] = value;
st->val[1] = CHAR_NULL;
}
static void st_assign(stoken_t * st, const char stype,
size_t pos, size_t len, const char* value)
{
const size_t MSIZE = LIBINJECTION_SQLI_TOKEN_SIZE;
size_t last = len < MSIZE ? len : (MSIZE - 1);
st->type = (char) stype;
st->pos = pos;
st->len = last;
memcpy(st->val, value, last);
st->val[last] = CHAR_NULL;
}
static void st_copy(stoken_t * dest, const stoken_t * src)
{
memcpy(dest, src, sizeof(stoken_t));
}
static int st_is_arithmetic_op(const stoken_t* st)
{
const char ch = st->val[0];
return (st->type == TYPE_OPERATOR && st->len == 1 &&
(ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '%'));
}
static int st_is_unary_op(const stoken_t * st)
{
const char* str = st->val;
const size_t len = st->len;
if (st->type != TYPE_OPERATOR) {
return FALSE;
}
switch (len) {
case 1:
return *str == '+' || *str == '-' || *str == '!' || *str == '~';
case 2:
return str[0] == '!' && str[1] == '!';
case 3:
return cstrcasecmp("NOT", str, 3) == 0;
default:
return FALSE;
}
}
/* Parsers
*
*
*/
static size_t parse_white(struct libinjection_sqli_state * sf)
{
return sf->pos + 1;
}
static size_t parse_operator1(struct libinjection_sqli_state * sf)
{
const char *cs = sf->s;
size_t pos = sf->pos;
st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, cs[pos]);
return pos + 1;
}
static size_t parse_other(struct libinjection_sqli_state * sf)
{
const char *cs = sf->s;
size_t pos = sf->pos;
st_assign_char(sf->current, TYPE_UNKNOWN, pos, 1, cs[pos]);
return pos + 1;
}
static size_t parse_char(struct libinjection_sqli_state * sf)
{
const char *cs = sf->s;
size_t pos = sf->pos;
st_assign_char(sf->current, cs[pos], pos, 1, cs[pos]);
return pos + 1;
}
static size_t parse_eol_comment(struct libinjection_sqli_state * sf)
{
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
const char *endpos =
(const char *) memchr((const void *) (cs + pos), '\n', slen - pos);
if (endpos == NULL) {
st_assign(sf->current, TYPE_COMMENT, pos, slen - pos, cs + pos);
return slen;
} else {
st_assign(sf->current, TYPE_COMMENT, pos, (size_t)(endpos - cs) - pos, cs + pos);
return (size_t)((endpos - cs) + 1);
}
}
/** In ANSI mode, hash is an operator
* In MYSQL mode, it's a EOL comment like '--'
*/
static size_t parse_hash(struct libinjection_sqli_state * sf)
{
sf->stats_comment_hash += 1;
if (sf->flags & FLAG_SQL_MYSQL) {
sf->stats_comment_hash += 1;
return parse_eol_comment(sf);
} else {
st_assign_char(sf->current, TYPE_OPERATOR, sf->pos, 1, '#');
return sf->pos + 1;
}
}
static size_t parse_dash(struct libinjection_sqli_state * sf)
{
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
/*
* five cases
* 1) --[white] this is always a SQL comment
* 2) --[EOF] this is a comment
* 3) --[notwhite] in MySQL this is NOT a comment but two unary operators
* 4) --[notwhite] everyone else thinks this is a comment
* 5) -[not dash] '-' is a unary operator
*/
if (pos + 2 < slen && cs[pos + 1] == '-' && char_is_white(cs[pos+2]) ) {
return parse_eol_comment(sf);
} else if (pos +2 == slen && cs[pos + 1] == '-') {
return parse_eol_comment(sf);
} else if (pos + 1 < slen && cs[pos + 1] == '-' && (sf->flags & FLAG_SQL_ANSI)) {
/* --[not-white] not-white case:
*
*/
sf->stats_comment_ddx += 1;
return parse_eol_comment(sf);
} else {
st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, '-');
return pos + 1;
}
}
/** This detects MySQL comments, comments that
* start with /x! We just ban these now but
* previously we attempted to parse the inside
*
* For reference:
* the form of /x![anything]x/ or /x!12345[anything] x/
*
* Mysql 3 (maybe 4), allowed this:
* /x!0selectx/ 1;
* where 0 could be any number.
*
* The last version of MySQL 3 was in 2003.
* It is unclear if the MySQL 3 syntax was allowed
* in MySQL 4. The last version of MySQL 4 was in 2008
*
*/
static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
{
/* so far...
* cs[pos] == '/' && cs[pos+1] == '*'
*/
if (pos + 2 >= len) {
/* not a mysql comment */
return 0;
}
if (cs[pos + 2] != '!') {
/* not a mysql comment */
return 0;
}
/*
* this is a mysql comment
* got "/x!"
*/
return 1;
}
static size_t parse_slash(struct libinjection_sqli_state * sf)
{
const char* ptr;
size_t clen;
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
const char* cur = cs + pos;
char ctype = TYPE_COMMENT;
size_t pos1 = pos + 1;
if (pos1 == slen || cs[pos1] != '*') {
return parse_operator1(sf);
}
/*
* skip over initial '/x'
*/
ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/');
/*
* (ptr == NULL) causes false positive in cppcheck 1.61
* casting to type seems to fix it
*/
if (ptr == (const char*) NULL) {
/* till end of line */
clen = slen - pos;
} else {
clen = (size_t)(ptr + 2 - cur);
}
/*
* postgresql allows nested comments which makes
* this is incompatible with parsing so
* if we find a '/x' inside the coment, then
* make a new token.
*
* Also, Mysql's "conditional" comments for version
* are an automatic black ban!
*/
if (memchr2(cur + 2, (size_t)(ptr - (cur + 1)), '/', '*') != NULL) {
ctype = TYPE_EVIL;
} else if (is_mysql_comment(cs, slen, pos)) {
ctype = TYPE_EVIL;
}
st_assign(sf->current, ctype, pos, clen, cs + pos);
return pos + clen;
}
static size_t parse_backslash(struct libinjection_sqli_state * sf)
{
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
/*
* Weird MySQL alias for NULL, "\N" (capital N only)
*/
if (pos + 1 < slen && cs[pos +1] == 'N') {
st_assign(sf->current, TYPE_NUMBER, pos, 2, cs + pos);
return pos + 2;
} else {
st_assign_char(sf->current, TYPE_BACKSLASH, pos, 1, cs[pos]);
return pos + 1;
}
}
static size_t parse_operator2(struct libinjection_sqli_state * sf)
{
char ch;
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
if (pos + 1 >= slen) {
return parse_operator1(sf);
}
if (pos + 2 < slen &&
cs[pos] == '<' &&
cs[pos + 1] == '=' &&
cs[pos + 2] == '>') {
/*
* special 3-char operator
*/
st_assign(sf->current, TYPE_OPERATOR, pos, 3, cs + pos);
return pos + 3;
}
ch = sf->lookup(sf, LOOKUP_OPERATOR, cs + pos, 2);
if (ch != CHAR_NULL) {
st_assign(sf->current, ch, pos, 2, cs+pos);
return pos + 2;
}
/*
* not an operator.. what to do with the two
* characters we got?
*/
if (cs[pos] == ':') {
/* ':' is not an operator */
st_assign(sf->current, TYPE_COLON, pos, 1, cs+pos);
return pos + 1;
} else {
/*
* must be a single char operator
*/
return parse_operator1(sf);
}
}
/*
* Ok! " \" " one backslash = escaped!
* " \\" " two backslash = not escaped!
* "\\\" " three backslash = escaped!
*/
static int is_backslash_escaped(const char* end, const char* start)
{
const char* ptr;
for (ptr = end; ptr >= start; ptr--) {
if (*ptr != '\\') {
break;
}
}
/* if number of backslashes is odd, it is escaped */
return (end - ptr) & 1;
}
static size_t is_double_delim_escaped(const char* cur, const char* end)
{
return ((cur + 1) < end) && *(cur+1) == *cur;
}
/* Look forward for doubling of delimiter
*
* case 'foo''bar' --> foo''bar
*
* ending quote isn't duplicated (i.e. escaped)
* since it's the wrong char or EOL
*
*/
static size_t parse_string_core(const char *cs, const size_t len, size_t pos,
stoken_t * st, char delim, size_t offset)
{
/*
* offset is to skip the perhaps first quote char
*/
const char *qpos =
(const char *) memchr((const void *) (cs + pos + offset), delim,
len - pos - offset);
/*
* then keep string open/close info
*/
if (offset > 0) {
/*
* this is real quote
*/
st->str_open = delim;
} else {
/*
* this was a simulated quote
*/
st->str_open = CHAR_NULL;
}
while (TRUE) {
if (qpos == NULL) {
/*
* string ended with no trailing quote
* assign what we have
*/
st_assign(st, TYPE_STRING, pos + offset, len - pos - offset, cs + pos + offset);
st->str_close = CHAR_NULL;
return len;
} else if ( is_backslash_escaped(qpos - 1, cs + pos + offset)) {
/* keep going, move ahead one character */
qpos =
(const char *) memchr((const void *) (qpos + 1), delim,
(size_t)((cs + len) - (qpos + 1)));
continue;
} else if (is_double_delim_escaped(qpos, cs + len)) {
/* keep going, move ahead two characters */
qpos =
(const char *) memchr((const void *) (qpos + 2), delim,
(size_t)((cs + len) - (qpos + 2)));
continue;
} else {
/* hey it's a normal string */
st_assign(st, TYPE_STRING, pos + offset,
(size_t)(qpos - (cs + pos + offset)), cs + pos + offset);
st->str_close = delim;
return (size_t)(qpos - cs + 1);
}
}
}
/**
* Used when first char is a ' or "
*/
static size_t parse_string(struct libinjection_sqli_state * sf)
{
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
/*
* assert cs[pos] == single or double quote
*/
return parse_string_core(cs, slen, pos, sf->current, cs[pos], 1);
}
/**
* Used when first char is:
* N or n: mysql "National Character set"
* E : psql "Escaped String"
*/
static size_t parse_estring(struct libinjection_sqli_state * sf)
{
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
if (pos + 2 >= slen || cs[pos+1] != CHAR_SINGLE) {
return parse_word(sf);
}
return parse_string_core(cs, slen, pos, sf->current, CHAR_SINGLE, 2);
}
static size_t parse_ustring(struct libinjection_sqli_state * sf)
{
const char *cs = sf->s;
size_t slen = sf->slen;
size_t pos = sf->pos;
if (pos + 2 < slen && cs[pos+1] == '&' && cs[pos+2] == '\'') {
sf->pos += 2;
pos = parse_string(sf);
sf->current->str_open = 'u';
if (sf->current->str_close == '\'') {
sf->current->str_close = 'u';
}
return pos;
} else {
return parse_word(sf);
}
}
static size_t parse_qstring_core(struct libinjection_sqli_state * sf, size_t offset)
{
char ch;
const char *strend;
const char *cs = sf->s;
size_t slen = sf->slen;
size_t pos = sf->pos + offset;
/* if we are already at end of string..
if current char is not q or Q
if we don't have 2 more chars
if char2 != a single quote
then, just treat as word
*/
if (pos >= slen ||
(cs[pos] != 'q' && cs[pos] != 'Q') ||
pos + 2 >= slen ||
cs[pos + 1] != '\'') {
return parse_word(sf);
}
ch = cs[pos + 2];
/* the ch > 127 is un-needed since
* we assume char is signed
*/
if (ch < 33 /* || ch > 127 */) {
return parse_word(sf);
}
switch (ch) {
case '(' : ch = ')'; break;
case '[' : ch = ']'; break;
case '{' : ch = '}'; break;
case '<' : ch = '>'; break;
}
strend = memchr2(cs + pos + 3, slen - pos - 3, ch, '\'');
if (strend == NULL) {
st_assign(sf->current, TYPE_STRING, pos + 3, slen - pos - 3, cs + pos + 3);
sf->current->str_open = 'q';
sf->current->str_close = CHAR_NULL;
return slen;
} else {
st_assign(sf->current, TYPE_STRING, pos + 3, (size_t)(strend - cs) - pos - 3, cs + pos + 3);
sf->current->str_open = 'q';
sf->current->str_close = 'q';
return (size_t)(strend - cs + 2);
}
}
/*
* Oracle's q string
*/
static size_t parse_qstring(struct libinjection_sqli_state * sf)
{
return parse_qstring_core(sf, 0);
}
/*
* mysql's N'STRING' or
* ... Oracle's nq string
*/
static size_t parse_nqstring(struct libinjection_sqli_state * sf)
{
size_t slen = sf->slen;
size_t pos = sf->pos;
if (pos + 2 < slen && sf->s[pos+1] == CHAR_SINGLE) {
return parse_estring(sf);
}
return parse_qstring_core(sf, 1);
}
/*
* binary literal string
* re: [bB]'[01]*'
*/
static size_t parse_bstring(struct libinjection_sqli_state *sf)
{
size_t wlen;
const char *cs = sf->s;
size_t pos = sf->pos;
size_t slen = sf->slen;
/* need at least 2 more characters
* if next char isn't a single quote, then
* continue as normal word
*/
if (pos + 2 >= slen || cs[pos+1] != '\'') {
return parse_word(sf);
}
wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "01");
if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
return parse_word(sf);
}
st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
return pos + 2 + wlen + 1;
}
/*
* hex literal string
* re: [xX]'[0123456789abcdefABCDEF]*'
* mysql has requirement of having EVEN number of chars,
* but pgsql does not
*/
static size_t parse_xstring(struct libinjection_sqli_state *sf)
{
size_t wlen;
const char *cs = sf->s;
size_t pos = sf->pos;
size_t slen = sf->slen;
/* need at least 2 more characters
* if next char isn't a single quote, then
* continue as normal word
*/
if (pos + 2 >= slen || cs[pos+1] != '\'') {
return parse_word(sf);
}
wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "0123456789ABCDEFabcdef");
if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
return parse_word(sf);
}
st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
return pos + 2 + wlen + 1;
}
/**
* This handles MS SQLSERVER bracket words
* http://stackoverflow.com/questions/3551284/sql-serverwhat-do-brackets-mean-around-column-name
*
*/
static size_t parse_bword(struct libinjection_sqli_state * sf)
{
const char *cs = sf->s;
size_t pos = sf->pos;
const char* endptr = (const char*) memchr(cs + pos, ']', sf->slen - pos);
if (endptr == NULL) {
st_assign(sf->current, TYPE_BAREWORD, pos, sf->slen - pos, cs + pos);
return sf->slen;
} else {
st_assign(sf->current, TYPE_BAREWORD, pos, (size_t)(endptr - cs) - pos + 1, cs + pos);
return (size_t)((endptr - cs) + 1);
}
}
static size_t parse_word(struct libinjection_sqli_state * sf)
{
char ch;
char delim;
size_t i;
const char *cs = sf->s;
size_t pos = sf->pos;
size_t wlen = strlencspn(cs + pos, sf->slen - pos,
" []{}<>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r\"\240\000");
st_assign(sf->current, TYPE_BAREWORD, pos, wlen, cs + pos);
/* now we need to look inside what we good for "." and "`"
* and see if what is before is a keyword or not
*/
for (i =0; i < sf->current->len; ++i) {
delim = sf->current->val[i];
if (delim == '.' || delim == '`') {
ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, i);
if (ch != TYPE_NONE && ch != TYPE_BAREWORD) {
/* needed for swig */
st_clear(sf->current);
/*
* we got something like "SELECT.1"
* or SELECT`column`
*/
st_assign(sf->current, ch, pos, i, cs + pos);
return pos + i;
}
}
}
/*
* do normal lookup with word including '.'
*/
if (wlen < LIBINJECTION_SQLI_TOKEN_SIZE) {
ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, wlen);
if (ch == CHAR_NULL) {
ch = TYPE_BAREWORD;
}
sf->current->type = ch;
}
return pos + wlen;
}
/* MySQL backticks are a cross between string and
* and a bare word.
*
*/
static size_t parse_tick(struct libinjection_sqli_state* sf)
{
size_t pos = parse_string_core(sf->s, sf->slen, sf->pos, sf->current, CHAR_TICK, 1);
/* we could check to see if start and end of
* of string are both "`", i.e. make sure we have
* matching set. `foo` vs. `foo
* but I don't think it matters much
*/
/* check value of string to see if it's a keyword,
* function, operator, etc
*/
char ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, sf->current->len);
if (ch == TYPE_FUNCTION) {
/* if it's a function, then convert token */
sf->current->type = TYPE_FUNCTION;
} else {
/* otherwise it's a 'n' type -- mysql treats
* everything as a bare word
*/
sf->current->type = TYPE_BAREWORD;
}
return pos;
}
static size_t parse_var(struct libinjection_sqli_state * sf)
{
size_t xlen;
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos + 1;
/*
* var_count is only used to reconstruct
* the input. It counts the number of '@'
* seen 0 in the case of NULL, 1 or 2
*/
/*
* move past optional other '@'
*/
if (pos < slen && cs[pos] == '@') {
pos += 1;
sf->current->count = 2;
} else {
sf->current->count = 1;
}
/*
* MySQL allows @@`version`
*/
if (pos < slen) {
if (cs[pos] == '`') {
sf->pos = pos;
pos = parse_tick(sf);
sf->current->type = TYPE_VARIABLE;
return pos;
} else if (cs[pos] == CHAR_SINGLE || cs[pos] == CHAR_DOUBLE) {
sf->pos = pos;
pos = parse_string(sf);
sf->current->type = TYPE_VARIABLE;
return pos;
}
}
xlen = strlencspn(cs + pos, slen - pos,
" <>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r'`\"");
if (xlen == 0) {
st_assign(sf->current, TYPE_VARIABLE, pos, 0, cs + pos);
return pos;
} else {
st_assign(sf->current, TYPE_VARIABLE, pos, xlen, cs + pos);
return pos + xlen;
}
}
static size_t parse_money(struct libinjection_sqli_state *sf)
{
size_t xlen;
const char* strend;
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
if (pos + 1 == slen) {
/* end of line */
st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
return slen;
}
/*
* $1,000.00 or $1.000,00 ok!
* This also parses $....,,,111 but that's ok
*/
xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,");
if (xlen == 0) {
if (cs[pos + 1] == '$') {
/* we have $$ .. find ending $$ and make string */
strend = memchr2(cs + pos + 2, slen - pos -2, '$', '$');
if (strend == NULL) {
/* fell off edge */
st_assign(sf->current, TYPE_STRING, pos + 2, slen - (pos + 2), cs + pos + 2);
sf->current->str_open = '$';
sf->current->str_close = CHAR_NULL;
return slen;
} else {
st_assign(sf->current, TYPE_STRING, pos + 2,
(size_t)(strend - (cs + pos + 2)), cs + pos + 2);
sf->current->str_open = '$';
sf->current->str_close = '$';
return (size_t)(strend - cs + 2);
}
} else {
/* ok it's not a number or '$$', but maybe it's pgsql "$ quoted strings" */
xlen = strlenspn(cs + pos + 1, slen - pos - 1, "abcdefghjiklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
if (xlen == 0) {
/* hmm it's "$" _something_ .. just add $ and keep going*/
st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
return pos + 1;
}
/* we have $foobar????? */
/* is it $foobar$ */
if (pos + xlen + 1 == slen || cs[pos+xlen+1] != '$') {
/* not $foobar$, or fell off edge */
st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
return pos + 1;
}
/* we have $foobar$ ... find it again */
strend = my_memmem(cs+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2);
if (strend == NULL || ((size_t)(strend - cs) < (pos+xlen+2))) {
/* fell off edge */
st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2);
sf->current->str_open = '$';
sf->current->str_close = CHAR_NULL;
return slen;
} else {
/* got one */
st_assign(sf->current, TYPE_STRING, pos+xlen+2,
(size_t)(strend - (cs + pos + xlen + 2)), cs+pos+xlen+2);
sf->current->str_open = '$';
sf->current->str_close = '$';
return (size_t)((strend + xlen + 2) - cs);
}
}
} else if (xlen == 1 && cs[pos + 1] == '.') {
/* $. should parsed as a word */
return parse_word(sf);
} else {
st_assign(sf->current, TYPE_NUMBER, pos, 1 + xlen, cs + pos);
return pos + 1 + xlen;
}
}
static size_t parse_number(struct libinjection_sqli_state * sf)
{
size_t xlen;
size_t start;
const char* digits = NULL;
const char *cs = sf->s;
const size_t slen = sf->slen;
size_t pos = sf->pos;
int have_e = 0;
int have_exp = 0;
/* cs[pos] == '0' has 1/10 chance of being true,
* while pos+1< slen is almost always true
*/
if (cs[pos] == '0' && pos + 1 < slen) {
if (cs[pos + 1] == 'X' || cs[pos + 1] == 'x') {
digits = "0123456789ABCDEFabcdef";
} else if (cs[pos + 1] == 'B' || cs[pos + 1] == 'b') {
digits = "01";
}
if (digits) {
xlen = strlenspn(cs + pos + 2, slen - pos - 2, digits);
if (xlen == 0) {
st_assign(sf->current, TYPE_BAREWORD, pos, 2, cs + pos);
return pos + 2;
} else {
st_assign(sf->current, TYPE_NUMBER, pos, 2 + xlen, cs + pos);
return pos + 2 + xlen;
}
}
}
start = pos;
while (pos < slen && ISDIGIT(cs[pos])) {
pos += 1;
}
if (pos < slen && cs[pos] == '.') {
pos += 1;
while (pos < slen && ISDIGIT(cs[pos])) {
pos += 1;
}
if (pos - start == 1) {
/* only one character read so far */
st_assign_char(sf->current, TYPE_DOT, start, 1, '.');
return pos;
}
}
if (pos < slen) {
if (cs[pos] == 'E' || cs[pos] == 'e') {
have_e = 1;
pos += 1;
if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) {
pos += 1;
}
while (pos < slen && ISDIGIT(cs[pos])) {
have_exp = 1;
pos += 1;
}
}
}
/* oracle's ending float or double suffix
* http://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements003.htm#i139891
*/
if (pos < slen && (cs[pos] == 'd' || cs[pos] == 'D' || cs[pos] == 'f' || cs[pos] == 'F')) {
if (pos + 1 == slen) {
/* line ends evaluate "... 1.2f$" as '1.2f' */
pos += 1;
} else if ((char_is_white(cs[pos+1]) || cs[pos+1] == ';')) {
/*
* easy case, evaluate "... 1.2f ... as '1.2f'
*/
pos += 1;
} else if (cs[pos+1] == 'u' || cs[pos+1] == 'U') {
/*
* a bit of a hack but makes '1fUNION' parse as '1f UNION'
*/
pos += 1;
} else {
/* it's like "123FROM" */
/* parse as "123" only */
}
}
if (have_e == 1 && have_exp == 0) {
/* very special form of
* "1234.e"
* "10.10E"
* ".E"
* this is a WORD not a number!! */
st_assign(sf->current, TYPE_BAREWORD, start, pos - start, cs + start);
} else {
st_assign(sf->current, TYPE_NUMBER, start, pos - start, cs + start);
}
return pos;
}
/*
* API to return version. This allows us to increment the version
* without having to regenerated the SWIG (or other binding) in minor
* releases.
*/
const char* libinjection_version()
{
return LIBINJECTION_VERSION;
}
int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf)
{
pt2Function fnptr;
size_t *pos = &sf->pos;
stoken_t *current = sf->current;
const char *s = sf->s;
const size_t slen = sf->slen;
if (slen == 0) {
return FALSE;
}
st_clear(current);
sf->current = current;
/*
* if we are at beginning of string
* and in single-quote or double quote mode
* then pretend the input starts with a quote
*/
if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) {
*pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0);
sf->stats_tokens += 1;
return TRUE;
}
while (*pos < slen) {
/*
* get current character
*/
const unsigned char ch = (unsigned char) (s[*pos]);
/*
* look up the parser, and call it
*
* Porting Note: this is mapping of char to function
* charparsers[ch]()
*/
fnptr = char_parse_map[ch];
*pos = (*fnptr) (sf);
/*
*
*/
if (current->type != CHAR_NULL) {
sf->stats_tokens += 1;
return TRUE;
}
}
return FALSE;
}
void libinjection_sqli_init(struct libinjection_sqli_state * sf, const char *s, size_t len, int flags)
{
if (flags == 0) {
flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
}
memset(sf, 0, sizeof(struct libinjection_sqli_state));
sf->s = s;
sf->slen = len;
sf->lookup = libinjection_sqli_lookup_word;
sf->userdata = 0;
sf->flags = flags;
sf->current = &(sf->tokenvec[0]);
}
void libinjection_sqli_reset(struct libinjection_sqli_state * sf, int flags)
{
void *userdata = sf->userdata;
ptr_lookup_fn lookup = sf->lookup;;
if (flags == 0) {
flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
}
libinjection_sqli_init(sf, sf->s, sf->slen, flags);
sf->lookup = lookup;
sf->userdata = userdata;
}
void libinjection_sqli_callback(struct libinjection_sqli_state * sf, ptr_lookup_fn fn, void* userdata)
{
if (fn == NULL) {
sf->lookup = libinjection_sqli_lookup_word;
sf->userdata = (void*)(NULL);
} else {
sf->lookup = fn;
sf->userdata = userdata;
}
}
/** See if two tokens can be merged since they are compound SQL phrases.
*
* This takes two tokens, and, if they are the right type,
* merges their values together. Then checks to see if the
* new value is special using the PHRASES mapping.
*
* Example: "UNION" + "ALL" ==> "UNION ALL"
*
* C Security Notes: this is safe to use C-strings (null-terminated)
* since the types involved by definition do not have embedded nulls
* (e.g. there is no keyword with embedded null)
*
* Porting Notes: since this is C, it's oddly complicated.
* This is just: multikeywords[token.value + ' ' + token2.value]
*
*/
static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, stoken_t * b)
{
size_t sz1;
size_t sz2;
size_t sz3;
char tmp[LIBINJECTION_SQLI_TOKEN_SIZE];
char ch;
/* first token is of right type? */
if (!
(a->type == TYPE_KEYWORD ||
a->type == TYPE_BAREWORD ||
a->type == TYPE_OPERATOR ||
a->type == TYPE_UNION ||
a->type == TYPE_FUNCTION ||
a->type == TYPE_EXPRESSION ||
a->type == TYPE_TSQL ||
a->type == TYPE_SQLTYPE)) {
return FALSE;
}
if (!
(b->type == TYPE_KEYWORD ||
b->type == TYPE_BAREWORD ||
b->type == TYPE_OPERATOR ||
b->type == TYPE_UNION ||
b->type == TYPE_FUNCTION ||
b->type == TYPE_EXPRESSION ||
b->type == TYPE_TSQL ||
b->type == TYPE_SQLTYPE ||
b->type == TYPE_LOGIC_OPERATOR)) {
return FALSE;
}
sz1 = a->len;
sz2 = b->len;
sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */
if (sz3 >= LIBINJECTION_SQLI_TOKEN_SIZE) { /* make sure there is room for ending null */
return FALSE;
}
/*
* oddly annoying last.val + ' ' + current.val
*/
memcpy(tmp, a->val, sz1);
tmp[sz1] = ' ';
memcpy(tmp + sz1 + 1, b->val, sz2);
tmp[sz3] = CHAR_NULL;
ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3);
if (ch != CHAR_NULL) {
st_assign(a, ch, a->pos, sz3, tmp);
return TRUE;
} else {
return FALSE;
}
}
int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
{
stoken_t last_comment;
/* POS is the position of where the NEXT token goes */
size_t pos = 0;
/* LEFT is a count of how many tokens that are already
folded or processed (i.e. part of the fingerprint) */
size_t left = 0;
int more = 1;
st_clear(&last_comment);
/* Skip all initial comments, right-parens ( and unary operators
*
*/
sf->current = &(sf->tokenvec[0]);
while (more) {
more = libinjection_sqli_tokenize(sf);
if ( ! (sf->current->type == TYPE_COMMENT ||
sf->current->type == TYPE_LEFTPARENS ||
sf->current->type == TYPE_SQLTYPE ||
st_is_unary_op(sf->current))) {
break;
}
}
if (! more) {
/* If input was only comments, unary or (, then exit */
return 0;
} else {
/* it's some other token */
pos += 1;
}
while (1) {
FOLD_DEBUG;
/* do we have all the max number of tokens? if so do
* some special cases for 5 tokens
*/
if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) {
if (
(
sf->tokenvec[0].type == TYPE_NUMBER &&
(sf->tokenvec[1].type == TYPE_OPERATOR || sf->tokenvec[1].type == TYPE_COMMA) &&
sf->tokenvec[2].type == TYPE_LEFTPARENS &&
sf->tokenvec[3].type == TYPE_NUMBER &&
sf->tokenvec[4].type == TYPE_RIGHTPARENS
) ||
(
sf->tokenvec[0].type == TYPE_BAREWORD &&
sf->tokenvec[1].type == TYPE_OPERATOR &&
sf->tokenvec[2].type == TYPE_LEFTPARENS &&
(sf->tokenvec[3].type == TYPE_BAREWORD || sf->tokenvec[3].type == TYPE_NUMBER) &&
sf->tokenvec[4].type == TYPE_RIGHTPARENS
) ||
(
sf->tokenvec[0].type == TYPE_NUMBER &&
sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
sf->tokenvec[2].type == TYPE_COMMA &&
sf->tokenvec[3].type == TYPE_LEFTPARENS &&
sf->tokenvec[4].type == TYPE_NUMBER
) ||
(
sf->tokenvec[0].type == TYPE_BAREWORD &&
sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
sf->tokenvec[2].type == TYPE_OPERATOR &&
sf->tokenvec[3].type == TYPE_LEFTPARENS &&
sf->tokenvec[4].type == TYPE_BAREWORD
)
)
{
if (pos > LIBINJECTION_SQLI_MAX_TOKENS) {
st_copy(&(sf->tokenvec[1]), &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS]));
pos = 2;
left = 0;
} else {
pos = 1;
left = 0;
}
}
}
if (! more || left >= LIBINJECTION_SQLI_MAX_TOKENS) {
left = pos;
break;
}
/* get up to two tokens */
while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && (pos - left) < 2) {
sf->current = &(sf->tokenvec[pos]);
more = libinjection_sqli_tokenize(sf);
if (more) {
if (sf->current->type == TYPE_COMMENT) {
st_copy(&last_comment, sf->current);
} else {
last_comment.type = CHAR_NULL;
pos += 1;
}
}
}
FOLD_DEBUG;
/* did we get 2 tokens? if not then we are done */
if (pos - left < 2) {
left = pos;
continue;
}
/* FOLD: "ss" -> "s"
* "foo" "bar" is valid SQL
* just ignore second string
*/
if (sf->tokenvec[left].type == TYPE_STRING && sf->tokenvec[left+1].type == TYPE_STRING) {
pos -= 1;
sf->stats_folds += 1;
continue;
} else if (sf->tokenvec[left].type == TYPE_SEMICOLON && sf->tokenvec[left+1].type == TYPE_SEMICOLON) {
/* not sure how various engines handle
* 'select 1;;drop table foo' or
* 'select 1; /x foo x/; drop table foo'
* to prevent surprises, just fold away repeated semicolons
*/
pos -= 1;
sf->stats_folds += 1;
continue;
} else if ((sf->tokenvec[left].type == TYPE_OPERATOR ||
sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) &&
(st_is_unary_op(&sf->tokenvec[left+1]) ||
sf->tokenvec[left+1].type == TYPE_SQLTYPE)) {
pos -= 1;
sf->stats_folds += 1;
left = 0;
continue;
} else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
st_is_unary_op(&sf->tokenvec[left+1])) {
pos -= 1;
sf->stats_folds += 1;
if (left > 0) {
left -= 1;
}
continue;
} else if (syntax_merge_words(sf, &sf->tokenvec[left], &sf->tokenvec[left+1])) {
pos -= 1;
sf->stats_folds += 1;
if (left > 0) {
left -= 1;
}
continue;
} else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
sf->tokenvec[left+1].type == TYPE_FUNCTION &&
(sf->tokenvec[left+1].val[0] == 'I' ||
sf->tokenvec[left+1].val[0] == 'i' ) &&
(sf->tokenvec[left+1].val[1] == 'F' ||
sf->tokenvec[left+1].val[1] == 'f' )) {
/* IF is normally a function, except in Transact-SQL where it can be used as a
* standalone control flow operator, e.g. ; IF 1=1 ...
* if found after a semicolon, convert from 'f' type to 'T' type
*/
sf->tokenvec[left+1].type = TYPE_TSQL;
/* left += 2; */
continue; /* reparse everything, but we probably can advance left, and pos */
} else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) &&
sf->tokenvec[left+1].type == TYPE_LEFTPARENS && (
/* TSQL functions but common enough to be column names */
cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
/* Function in MYSQL */
cstrcasecmp("DATABASE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
cstrcasecmp("PASSWORD", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
/* Mysql words that act as a variable and are a function */
/* TSQL current_users is fake-variable */
/* http://msdn.microsoft.com/en-us/library/ms176050.aspx */
cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
cstrcasecmp("LOCALTIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
)) {
/* pos is the same
* other conversions need to go here... for instance
* password CAN be a function, coalesce CAN be a function
*/
sf->tokenvec[left].type = TYPE_FUNCTION;
continue;
} else if (sf->tokenvec[left].type == TYPE_KEYWORD && (
cstrcasecmp("IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
cstrcasecmp("NOT IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
)) {
if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
/* got .... IN ( ... (or 'NOT IN')
* it's an operator
*/
sf->tokenvec[left].type = TYPE_OPERATOR;
} else {
/*
* it's a nothing
*/
sf->tokenvec[left].type = TYPE_BAREWORD;
}
/* "IN" can be used as "IN BOOLEAN MODE" for mysql
* in which case merging of words can be done later
* other wise it acts as an equality operator __ IN (values..)
*
* here we got "IN" "(" so it's an operator.
* also back track to handle "NOT IN"
* might need to do the same with like
* two use cases "foo" LIKE "BAR" (normal operator)
* "foo" = LIKE(1,2)
*/
continue;
} else if ((sf->tokenvec[left].type == TYPE_OPERATOR) && (
cstrcasecmp("LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
cstrcasecmp("NOT LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0)) {
if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
/* SELECT LIKE(...
* it's a function
*/
sf->tokenvec[left].type = TYPE_FUNCTION;
}
} else if (sf->tokenvec[left].type == TYPE_SQLTYPE &&
(sf->tokenvec[left+1].type == TYPE_BAREWORD ||
sf->tokenvec[left+1].type == TYPE_NUMBER ||
sf->tokenvec[left+1].type == TYPE_SQLTYPE ||
sf->tokenvec[left+1].type == TYPE_LEFTPARENS ||
sf->tokenvec[left+1].type == TYPE_FUNCTION ||
sf->tokenvec[left+1].type == TYPE_VARIABLE ||
sf->tokenvec[left+1].type == TYPE_STRING)) {
st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
pos -= 1;
sf->stats_folds += 1;
left = 0;
continue;
} else if (sf->tokenvec[left].type == TYPE_COLLATE &&
sf->tokenvec[left+1].type == TYPE_BAREWORD) {
/*
* there are too many collation types.. so if the bareword has a "_"
* then it's TYPE_SQLTYPE
*/
if (strchr(sf->tokenvec[left+1].val, '_') != NULL) {
sf->tokenvec[left+1].type = TYPE_SQLTYPE;
left = 0;
}
} else if (sf->tokenvec[left].type == TYPE_BACKSLASH) {
if (st_is_arithmetic_op(&(sf->tokenvec[left+1]))) {
/* very weird case in TSQL where '\%1' is parsed as '0 % 1', etc */
sf->tokenvec[left].type = TYPE_NUMBER;
} else {
/* just ignore it.. Again T-SQL seems to parse \1 as "1" */
st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
pos -= 1;
sf->stats_folds += 1;
}
left = 0;
continue;
} else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
pos -= 1;
left = 0;
sf->stats_folds += 1;
continue;
} else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS &&
sf->tokenvec[left+1].type == TYPE_RIGHTPARENS) {
pos -= 1;
left = 0;
sf->stats_folds += 1;
continue;
} else if (sf->tokenvec[left].type == TYPE_LEFTBRACE &&
sf->tokenvec[left+1].type == TYPE_BAREWORD) {
/*
* MySQL Degenerate case --
*
* select { ``.``.id }; -- valid !!!
* select { ``.``.``.id }; -- invalid
* select ``.``.id; -- invalid
* select { ``.id }; -- invalid
*
* so it appears {``.``.id} is a magic case
* I suspect this is "current database, current table, field id"
*
* The folding code can't look at more than 3 tokens, and
* I don't want to make two passes.
*
* Since "{ ``" so rare, we are just going to blacklist it.
*
* Highly likely this will need revisiting!
*
* CREDIT @rsalgado 2013-11-25
*/
if (sf->tokenvec[left+1].len == 0) {
sf->tokenvec[left+1].type = TYPE_EVIL;
return (int)(left+2);
}
/* weird ODBC / MYSQL {foo expr} --> expr
* but for this rule we just strip away the "{ foo" part
*/
left = 0;
pos -= 2;
sf->stats_folds += 2;
continue;
} else if (sf->tokenvec[left+1].type == TYPE_RIGHTBRACE) {
pos -= 1;
left = 0;
sf->stats_folds += 1;
continue;
}
/* all cases of handing 2 tokens is done
and nothing matched. Get one more token
*/
FOLD_DEBUG;
while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) {
sf->current = &(sf->tokenvec[pos]);
more = libinjection_sqli_tokenize(sf);
if (more) {
if (sf->current->type == TYPE_COMMENT) {
st_copy(&last_comment, sf->current);
} else {
last_comment.type = CHAR_NULL;
pos += 1;
}
}
}
/* do we have three tokens? If not then we are done */
if (pos -left < 3) {
left = pos;
continue;
}
/*
* now look for three token folding
*/
if (sf->tokenvec[left].type == TYPE_NUMBER &&
sf->tokenvec[left+1].type == TYPE_OPERATOR &&
sf->tokenvec[left+2].type == TYPE_NUMBER) {
pos -= 2;
left = 0;
continue;
} else if (sf->tokenvec[left].type == TYPE_OPERATOR &&
sf->tokenvec[left+1].type != TYPE_LEFTPARENS &&
sf->tokenvec[left+2].type == TYPE_OPERATOR) {
left = 0;
pos -= 2;
continue;
} else if (sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR &&
sf->tokenvec[left+2].type == TYPE_LOGIC_OPERATOR) {
pos -= 2;
left = 0;
continue;
} else if (sf->tokenvec[left].type == TYPE_VARIABLE &&
sf->tokenvec[left+1].type == TYPE_OPERATOR &&
(sf->tokenvec[left+2].type == TYPE_VARIABLE ||
sf->tokenvec[left+2].type == TYPE_NUMBER ||
sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
pos -= 2;
left = 0;
continue;
} else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
sf->tokenvec[left].type == TYPE_NUMBER ) &&
sf->tokenvec[left+1].type == TYPE_OPERATOR &&
(sf->tokenvec[left+2].type == TYPE_NUMBER ||
sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
pos -= 2;
left = 0;
continue;
} else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
sf->tokenvec[left].type == TYPE_NUMBER ||
sf->tokenvec[left].type == TYPE_VARIABLE ||
sf->tokenvec[left].type == TYPE_STRING) &&
sf->tokenvec[left+1].type == TYPE_OPERATOR &&
streq(sf->tokenvec[left+1].val, "::") &&
sf->tokenvec[left+2].type == TYPE_SQLTYPE) {
pos -= 2;
left = 0;
sf->stats_folds += 2;
continue;
} else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
sf->tokenvec[left].type == TYPE_NUMBER ||
sf->tokenvec[left].type == TYPE_STRING ||
sf->tokenvec[left].type == TYPE_VARIABLE) &&
sf->tokenvec[left+1].type == TYPE_COMMA &&
(sf->tokenvec[left+2].type == TYPE_NUMBER ||
sf->tokenvec[left+2].type == TYPE_BAREWORD ||
sf->tokenvec[left+2].type == TYPE_STRING ||
sf->tokenvec[left+2].type == TYPE_VARIABLE)) {
pos -= 2;
left = 0;
continue;
} else if ((sf->tokenvec[left].type == TYPE_EXPRESSION ||
sf->tokenvec[left].type == TYPE_GROUP ||
sf->tokenvec[left].type == TYPE_COMMA) &&
st_is_unary_op(&sf->tokenvec[left+1]) &&
sf->tokenvec[left+2].type == TYPE_LEFTPARENS) {
/* got something like SELECT + (, LIMIT + (
* remove unary operator
*/
st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
pos -= 1;
left = 0;
continue;
} else if ((sf->tokenvec[left].type == TYPE_KEYWORD ||
sf->tokenvec[left].type == TYPE_EXPRESSION ||
sf->tokenvec[left].type == TYPE_GROUP ) &&
st_is_unary_op(&sf->tokenvec[left+1]) &&
(sf->tokenvec[left+2].type == TYPE_NUMBER ||
sf->tokenvec[left+2].type == TYPE_BAREWORD ||
sf->tokenvec[left+2].type == TYPE_VARIABLE ||
sf->tokenvec[left+2].type == TYPE_STRING ||
sf->tokenvec[left+2].type == TYPE_FUNCTION )) {
/* remove unary operators
* select - 1
*/
st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
pos -= 1;
left = 0;
continue;
} else if (sf->tokenvec[left].type == TYPE_COMMA &&
st_is_unary_op(&sf->tokenvec[left+1]) &&
(sf->tokenvec[left+2].type == TYPE_NUMBER ||
sf->tokenvec[left+2].type == TYPE_BAREWORD ||
sf->tokenvec[left+2].type == TYPE_VARIABLE ||
sf->tokenvec[left+2].type == TYPE_STRING)) {
/*
* interesting case turn ", -1" ->> ",1" PLUS we need to back up
* one token if possible to see if more folding can be done
* "1,-1" --> "1"
*/
st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
left = 0;
/* pos is >= 3 so this is safe */
assert(pos >= 3);
pos -= 3;
continue;
} else if (sf->tokenvec[left].type == TYPE_COMMA &&
st_is_unary_op(&sf->tokenvec[left+1]) &&
sf->tokenvec[left+2].type == TYPE_FUNCTION) {
/* Separate case from above since you end up with
* 1,-sin(1) --> 1 (1)
* Here, just do
* 1,-sin(1) --> 1,sin(1)
* just remove unary operator
*/
st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
pos -= 1;
left = 0;
continue;
} else if ((sf->tokenvec[left].type == TYPE_BAREWORD) &&
(sf->tokenvec[left+1].type == TYPE_DOT) &&
(sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
/* ignore the '.n'
* typically is this databasename.table
*/
assert(pos >= 3);
pos -= 2;
left = 0;
continue;
} else if ((sf->tokenvec[left].type == TYPE_EXPRESSION) &&
(sf->tokenvec[left+1].type == TYPE_DOT) &&
(sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
/* select . `foo` --> select `foo` */
st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
pos -= 1;
left = 0;
continue;
} else if ((sf->tokenvec[left].type == TYPE_FUNCTION) &&
(sf->tokenvec[left+1].type == TYPE_LEFTPARENS) &&
(sf->tokenvec[left+2].type != TYPE_RIGHTPARENS)) {
/*
* whats going on here
* Some SQL functions like USER() have 0 args
* if we get User(foo), then User is not a function
* This should be expanded since it eliminated a lot of false
* positives.
*/
if (cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0) {
sf->tokenvec[left].type = TYPE_BAREWORD;
}
}
/* no folding -- assume left-most token is
is good, now use the existing 2 tokens --
do not get another
*/
left += 1;
} /* while(1) */
/* if we have 4 or less tokens, and we had a comment token
* at the end, add it back
*/
if (left < LIBINJECTION_SQLI_MAX_TOKENS && last_comment.type == TYPE_COMMENT) {
st_copy(&sf->tokenvec[left], &last_comment);
left += 1;
}
/* sometimes we grab a 6th token to help
determine the type of token 5.
*/
if (left > LIBINJECTION_SQLI_MAX_TOKENS) {
left = LIBINJECTION_SQLI_MAX_TOKENS;
}
return (int)left;
}
/* secondary api: detects SQLi in a string, GIVEN a context.
*
* A context can be:
* * CHAR_NULL (\0), process as is
* * CHAR_SINGLE ('), process pretending input started with a
* single quote.
* * CHAR_DOUBLE ("), process pretending input started with a
* double quote.
*
*/
const char* libinjection_sqli_fingerprint(struct libinjection_sqli_state * sql_state, int flags)
{
int i;
int tlen = 0;
libinjection_sqli_reset(sql_state, flags);
tlen = libinjection_sqli_fold(sql_state);
/* Check for magic PHP backquote comment
* If:
* * last token is of type "bareword"
* * And is quoted in a backtick
* * And isn't closed
* * And it's empty?
* Then convert it to comment
*/
if (tlen > 2 &&
sql_state->tokenvec[tlen-1].type == TYPE_BAREWORD &&
sql_state->tokenvec[tlen-1].str_open == CHAR_TICK &&
sql_state->tokenvec[tlen-1].len == 0 &&
sql_state->tokenvec[tlen-1].str_close == CHAR_NULL) {
sql_state->tokenvec[tlen-1].type = TYPE_COMMENT;
}
for (i = 0; i < tlen; ++i) {
sql_state->fingerprint[i] = sql_state->tokenvec[i].type;
}
/*
* make the fingerprint pattern a c-string (null delimited)
*/
sql_state->fingerprint[tlen] = CHAR_NULL;
/*
* check for 'X' in pattern, and then
* clear out all tokens
*
* this means parsing could not be done
* accurately due to pgsql's double comments
* or other syntax that isn't consistent.
* Should be very rare false positive
*/
if (strchr(sql_state->fingerprint, TYPE_EVIL)) {
/* needed for SWIG */
memset((void*)sql_state->fingerprint, 0, LIBINJECTION_SQLI_MAX_TOKENS + 1);
memset((void*)sql_state->tokenvec[0].val, 0, LIBINJECTION_SQLI_TOKEN_SIZE);
sql_state->fingerprint[0] = TYPE_EVIL;
sql_state->tokenvec[0].type = TYPE_EVIL;
sql_state->tokenvec[0].val[0] = TYPE_EVIL;
sql_state->tokenvec[1].type = CHAR_NULL;
}
return sql_state->fingerprint;
}
int libinjection_sqli_check_fingerprint(struct libinjection_sqli_state* sql_state)
{
return libinjection_sqli_blacklist(sql_state) &&
libinjection_sqli_not_whitelist(sql_state);
}
char libinjection_sqli_lookup_word(struct libinjection_sqli_state *sql_state, int lookup_type,
const char* str, size_t len)
{
if (lookup_type == LOOKUP_FINGERPRINT) {
return libinjection_sqli_check_fingerprint(sql_state) ? 'X' : '\0';
} else {
return bsearch_keyword_type(str, len, sql_keywords, sql_keywords_sz);
}
}
int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state)
{
/*
* use minimum of 8 bytes to make sure gcc -fstack-protector
* works correctly
*/
char fp2[8];
char ch;
size_t i;
size_t len = strlen(sql_state->fingerprint);
int patmatch;
if (len < 1) {
sql_state->reason = __LINE__;
return FALSE;
}
/*
to keep everything compatible, convert the
v0 fingerprint pattern to v1
v0: up to 5 chars, mixed case
v1: 1 char is '0', up to 5 more chars, upper case
*/
fp2[0] = '0';
for (i = 0; i < len; ++i) {
ch = sql_state->fingerprint[i];
if (ch >= 'a' && ch <= 'z') {
ch -= 0x20;
}
fp2[i+1] = ch;
}
fp2[i+1] = '\0';
patmatch = is_keyword(fp2, len + 1) == TYPE_FINGERPRINT;
/*
* No match.
*
* Set sql_state->reason to current line number
* only for debugging purposes.
*/
if (!patmatch) {
sql_state->reason = __LINE__;
return FALSE;
}
return TRUE;
}
/*
* return TRUE if SQLi, false is benign
*/
int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
{
/*
* We assume we got a SQLi match
* This next part just helps reduce false positives.
*
*/
char ch;
size_t tlen = strlen(sql_state->fingerprint);
if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) {
/*
* if ending comment is contains 'sp_password' then it's SQLi!
* MS Audit log apparently ignores anything with
* 'sp_password' in it. Unable to find primary reference to
* this "feature" of SQL Server but seems to be known SQLi
* technique
*/
if (my_memmem(sql_state->s, sql_state->slen,
"sp_password", strlen("sp_password"))) {
sql_state->reason = __LINE__;
return TRUE;
}
}
switch (tlen) {
case 2:{
/*
* case 2 are "very small SQLi" which make them
* hard to tell from normal input...
*/
if (sql_state->fingerprint[1] == TYPE_UNION) {
if (sql_state->stats_tokens == 2) {
/* not sure why but 1U comes up in SQLi attack
* likely part of parameter splitting/etc.
* lots of reasons why "1 union" might be normal
* input, so beep only if other SQLi things are present
*/
/* it really is a number and 'union'
* other wise it has folding or comments
*/
sql_state->reason = __LINE__;
return FALSE;
} else {
sql_state->reason = __LINE__;
return TRUE;
}
}
/*
* if 'comment' is '#' ignore.. too many FP
*/
if (sql_state->tokenvec[1].val[0] == '#') {
sql_state->reason = __LINE__;
return FALSE;
}
/*
* for fingerprint like 'nc', only comments of /x are treated
* as SQL... ending comments of "--" and "#" are not SQLi
*/
if (sql_state->tokenvec[0].type == TYPE_BAREWORD &&
sql_state->tokenvec[1].type == TYPE_COMMENT &&
sql_state->tokenvec[1].val[0] != '/') {
sql_state->reason = __LINE__;
return FALSE;
}
/*
* if '1c' ends with '/x' then it's SQLi
*/
if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
sql_state->tokenvec[1].type == TYPE_COMMENT &&
sql_state->tokenvec[1].val[0] == '/') {
return TRUE;
}
/**
* there are some odd base64-looking query string values
* 1234-ABCDEFEhfhihwuefi--
* which evaluate to "1c"... these are not SQLi
* but 1234-- probably is.
* Make sure the "1" in "1c" is actually a true decimal number
*
* Need to check -original- string since the folding step
* may have merged tokens, e.g. "1+FOO" is folded into "1"
*
* Note: evasion: 1*1--
*/
if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
sql_state->tokenvec[1].type == TYPE_COMMENT) {
if (sql_state->stats_tokens > 2) {
/* we have some folding going on, highly likely SQLi */
sql_state->reason = __LINE__;
return TRUE;
}
/*
* we check that next character after the number is either whitespace,
* or '/' or a '-' ==> SQLi.
*/
ch = sql_state->s[sql_state->tokenvec[0].len];
if ( ch <= 32 ) {
/* next char was whitespace,e.g. "1234 --"
* this isn't exactly correct.. ideally we should skip over all whitespace
* but this seems to be ok for now
*/
return TRUE;
}
if (ch == '/' && sql_state->s[sql_state->tokenvec[0].len + 1] == '*') {
return TRUE;
}
if (ch == '-' && sql_state->s[sql_state->tokenvec[0].len + 1] == '-') {
return TRUE;
}
sql_state->reason = __LINE__;
return FALSE;
}
/*
* detect obvious SQLi scans.. many people put '--' in plain text
* so only detect if input ends with '--', e.g. 1-- but not 1-- foo
*/
if ((sql_state->tokenvec[1].len > 2)
&& sql_state->tokenvec[1].val[0] == '-') {
sql_state->reason = __LINE__;
return FALSE;
}
break;
} /* case 2 */
case 3:{
/*
* ...foo' + 'bar...
* no opening quote, no closing quote
* and each string has data
*/
if (streq(sql_state->fingerprint, "sos")
|| streq(sql_state->fingerprint, "s&s")) {
if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
&& (sql_state->tokenvec[2].str_close == CHAR_NULL)
&& (sql_state->tokenvec[0].str_close == sql_state->tokenvec[2].str_open)) {
/*
* if ....foo" + "bar....
*/
sql_state->reason = __LINE__;
return TRUE;
}
if (sql_state->stats_tokens == 3) {
sql_state->reason = __LINE__;
return FALSE;
}
/*
* not SQLi
*/
sql_state->reason = __LINE__;
return FALSE;
} else if (streq(sql_state->fingerprint, "s&n") ||
streq(sql_state->fingerprint, "n&1") ||
streq(sql_state->fingerprint, "1&1") ||
streq(sql_state->fingerprint, "1&v") ||
streq(sql_state->fingerprint, "1&s")) {
/* 'sexy and 17' not SQLi
* 'sexy and 17<18' SQLi
*/
if (sql_state->stats_tokens == 3) {
sql_state->reason = __LINE__;
return FALSE;
}
} else if (sql_state->tokenvec[1].type == TYPE_KEYWORD) {
if ((sql_state->tokenvec[1].len < 5) ||
cstrcasecmp("INTO", sql_state->tokenvec[1].val, 4)) {
/* if it's not "INTO OUTFILE", or "INTO DUMPFILE" (MySQL)
* then treat as safe
*/
sql_state->reason = __LINE__;
return FALSE;
}
}
break;
} /* case 3 */
case 4:
case 5: {
/* nothing right now */
break;
} /* case 5 */
} /* end switch */
return TRUE;
}
/** Main API, detects SQLi in an input.
*
*
*/
static int reparse_as_mysql(struct libinjection_sqli_state * sql_state)
{
return sql_state->stats_comment_ddx ||
sql_state->stats_comment_hash;
}
/*
* This function is mostly use with SWIG
*/
struct libinjection_sqli_token*
libinjection_sqli_get_token(struct libinjection_sqli_state * sql_state, int i)
{
if (i < 0 || i > (int)LIBINJECTION_SQLI_MAX_TOKENS) {
return NULL;
}
return &(sql_state->tokenvec[i]);
}
int libinjection_is_sqli(struct libinjection_sqli_state * sql_state)
{
const char *s = sql_state->s;
size_t slen = sql_state->slen;
/*
* no input? not SQLi
*/
if (slen == 0) {
return FALSE;
}
/*
* test input "as-is"
*/
libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_ANSI);
if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
sql_state->fingerprint, strlen(sql_state->fingerprint))) {
return TRUE;
} else if (reparse_as_mysql(sql_state)) {
libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_MYSQL);
if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
sql_state->fingerprint, strlen(sql_state->fingerprint))) {
return TRUE;
}
}
/*
* if input has a single_quote, then
* test as if input was actually '
* example: if input if "1' = 1", then pretend it's
* "'1' = 1"
* Porting Notes: example the same as doing
* is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg)
*
*/
if (memchr(s, CHAR_SINGLE, slen)) {
libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_ANSI);
if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
sql_state->fingerprint, strlen(sql_state->fingerprint))) {
return TRUE;
} else if (reparse_as_mysql(sql_state)) {
libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_MYSQL);
if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
sql_state->fingerprint, strlen(sql_state->fingerprint))) {
return TRUE;
}
}
}
/*
* same as above but with a double-quote "
*/
if (memchr(s, CHAR_DOUBLE, slen)) {
libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_DOUBLE | FLAG_SQL_MYSQL);
if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
sql_state->fingerprint, strlen(sql_state->fingerprint))) {
return TRUE;
}
}
/*
* Hurray, input is not SQLi
*/
return FALSE;
}
int libinjection_sqli(const char* input, size_t slen, char fingerprint[])
{
int issqli;
struct libinjection_sqli_state state;
libinjection_sqli_init(&state, input, slen, 0);
issqli = libinjection_is_sqli(&state);
if (issqli) {
strcpy(fingerprint, state.fingerprint);
} else {
fingerprint[0] = '\0';
}
return issqli;
}