SELECT *
FROM table
WHERE condition
From a storage-engine point some questions arise:- can I see this statement ?
- what's in WHERE ?
- can I access the WHERE-clause ?
- can I use this information in my code (=storage engine) ?
The answer is YES. In this post I like to show you how to access this information. Using it in the engine will be a different thing, maybe I will show you in a later post how to do this.
This topic is named condition pushdown. Here you will find more information about this topic: MySQL 5.1 Plugin Development.
Gaining access to the information in the SQL-statement is simple: every engine is derived from a class handler and in this class you will find a function cond_push().
let's look into handler.h:
virtual const COND *cond_push(const COND *cond) { return cond; };
it's defined, but it does nothing.As a summary: by handling an SQL-statement the server prepares the statement, converts it into an appropriate form and for every table referenced in the statement it calls this routine in the storage-engine. For accessing this information we have to implement this function in our storage-engine.
So let's do it. Please allow me to skip long explanations but simply show you some code. Take the code from MinTempl as a base and extend it by adding via copy and paste:
in the header-file:
const COND *cond_push(const COND *cond);
in the code-file:/**
* WHERE
*
* in case you want to know something about the WHERE-clause
* you can use the following code as an example
*/
static void display_functype( int type)
{
const char *strFuncType[] = { "UNKNOWN_FUNC",
"EQ_FUNC", "EQUAL_FUNC", "NE_FUNC", "LT_FUNC",
"LE_FUNC", "GE_FUNC", "GT_FUNC", "FT_FUNC",
"LIKE_FUNC", "ISNULL_FUNC", "ISNOTNULL_FUNC", "COND_AND_FUNC",
"COND_OR_FUNC", "COND_XOR_FUNC", "BETWEEN", "IN_FUNC",
"MULT_EQUAL_FUNC", "INTERVAL_FUNC", "ISNOTNULLTEST_FUNC", "SP_EQUALS_FUNC",
"SP_DISJOINT_FUNC", "SP_INTERSECTS_FUNC", "SP_TOUCHES_FUNC", "SP_CROSSES_FUNC",
"SP_WITHIN_FUNC", "SP_CONTAINS_FUNC", "SP_OVERLAPS_FUNC", "SP_STARTPOINT",
"SP_ENDPOINT", "SP_EXTERIORRING", "SP_POINTN", "SP_GEOMETRYN",
"SP_INTERIORRINGN", "NOT_FUNC", "NOT_ALL_FUNC", "NOW_FUNC",
"TRIG_COND_FUNC", "SUSERVAR_FUNC", "GUSERVAR_FUNC", "COLLATE_FUNC",
"EXTRACT_FUNC", "CHAR_TYPECAST_FUNC", "FUNC_SP", "UDF_FUNC",
"NEG_FUNC", "GSYSVAR_FUNC" };
fprintf(stderr, "type=[%s]\t", strFuncType[type]);
}
const COND *ha_mintempl::cond_push(const COND *cond)
{
COND * tempCond = (COND*)cond;
if ( tempCond == NULL )
return NULL;
fprintf(stderr, "\ntable_name = <%s>\t<%s>\n", table_share->table_name.str, current_thd->query());
int level = 0;
fprintf(stderr, "\n");
while ( tempCond->next != NULL)
{
fprintf(stderr, "level=%d ", level);
switch( tempCond->type() )
{
case Item::FIELD_ITEM:
fprintf(stderr, "FIELD-ITEM");
fprintf(stderr, "\t[%s] [%s] [%s]",((Item_field*)tempCond)->db_name, ((Item_field*)tempCond)->table_name, ((Item_field*)tempCond)->field_name);
break;
case Item::FUNC_ITEM:
fprintf(stderr, "FUNC-ITEM");
fprintf(stderr, "=[%s]", ((Item_func*)tempCond)->func_name());
fprintf(stderr, " args: %d ", ((Item_func*)tempCond)->argument_count() );
display_functype( (int)((Item_func*)tempCond)->functype());
break;
case Item::SUM_FUNC_ITEM:
fprintf(stderr, "SUM-FUNC--ITEM");
break;
case Item::STRING_ITEM:
fprintf(stderr, "STRING-ITEM");
break;
case Item::INT_ITEM:
fprintf(stderr, "INT-ITEM");
fprintf(stderr, "\tval = %lld", ((Item_int*)tempCond)->value);
fprintf(stderr, " res=%d", ((Item_int*)tempCond)->result_type());
break;
case Item::REAL_ITEM:
fprintf(stderr, "REAL-ITEM");
break;
case Item::NULL_ITEM:
fprintf(stderr, "NULL-ITEM");
break;
case Item::VARBIN_ITEM:
fprintf(stderr, "VAR-BIN");
break;
case Item::COPY_STR_ITEM:
fprintf(stderr, "COPY-STR-ITEM");
break;
case Item::FIELD_AVG_ITEM:
fprintf(stderr, "FIELD-AVG-ITEM");
break;
case Item::DEFAULT_VALUE_ITEM:
fprintf(stderr, "DEFAULT-VLAUE-ITEM");
break;
case Item::PROC_ITEM:
fprintf(stderr, "PROC-ITEM");
break;
case Item::COND_ITEM:
fprintf(stderr, "COND-ITEM");
fprintf(stderr, " args: %d ", ((Item_cond*)tempCond)->argument_count() );
display_functype( (int)((Item_cond*)tempCond)->functype());
break;
case Item::REF_ITEM:
fprintf(stderr, "REF-ITEM");
break;
case Item::FIELD_STD_ITEM:
fprintf(stderr, "FIELD-STD-ITEM");
break;
case Item::FIELD_VARIANCE_ITEM:
fprintf(stderr, "FIELD-VAIRANCE-ITEM");
break;
case Item::INSERT_VALUE_ITEM:
fprintf(stderr, "INSERT-VALUE-ITEM");
break;
case Item::SUBSELECT_ITEM:
fprintf(stderr, "SUBSELECT-ITEM");
break;
case Item::ROW_ITEM:
fprintf(stderr, "ROW-ITEM");
break;
case Item::CACHE_ITEM:
fprintf(stderr, "CACHE-ITEM");
break;
case Item::TYPE_HOLDER:
fprintf(stderr, "TYPE-HOLDER");
break;
case Item::PARAM_ITEM:
fprintf(stderr, "PARAM-ITEM");
break;
case Item::TRIGGER_FIELD_ITEM:
fprintf(stderr, "TRIGGER-FIELD-ITEM");
break;
case Item::DECIMAL_ITEM:
fprintf(stderr, "DECIMAL-ITEM");
break;
case Item::XPATH_NODESET:
fprintf(stderr, "XPATH-NODESET");
break;
case Item::XPATH_NODESET_CMP:
fprintf(stderr, "XPATH-NODESET-CMP");
break;
case Item::VIEW_FIXER_ITEM:
fprintf(stderr, "VIEW-FIXER-ITEM");
break;
default:
fprintf(stderr, "unknown: %d ", tempCond->type());
break;
}
if (tempCond->str_value.ptr() != NULL )
fprintf(stderr, "\tstr_value=<%s>", tempCond->str_value.ptr());
if ( tempCond->name != NULL )
fprintf(stderr, "\tname=<%s>", tempCond->name);
level++;
tempCond=tempCond->next;
fprintf(stderr, "\n");
}
fprintf(stderr, "\n");
return cond;
}
hint: please look at the update-section at the end of this text.You will find a ink to an updated-version of this code in this scetion.
Then recompile everything. Now you have a storage-engine MinTempl with a meaning quite different from the one described in the posting that can be found in the archive of the blog (but that's what I used this template for: extending and playing with the code). If you prefer you can create a copy of MinTempl and give it a new name. For this posting I will call the storage-engine with the added code MinTempl.
When you start your fresh compiled server server you should check if MinTempl is there and if it is not you should install it as described in an earlier posting.
MariaDB
For MySQL everything is ready and awaiting the first test but for MariaDB you have to do something. Please look at this output which I reformatted and polished a bit:MariaDB [(none)]> SELECT @@optimizer_switch\G
*************************** 1. row ***************************
@@optimizer_switch:
index_merge=on,
index_merge_union=on,
index_merge_sort_union=on,
index_merge_intersection=on,
index_merge_sort_intersection=off,
engine_condition_pushdown=off,
index_condition_pushdown=on,
derived_merge=on,
derived_with_keys=on,
firstmatch=on,loosescan=on,
materialization=on,
in_to_exists=on,
semijoin=on,
partial_match_rowid_merge=on,
partial_match_table_scan=on,
subquery_cache=on,
mrr=off,
mrr_cost_based=off,
mrr_sort_keys=off,
outer_join_with_cache=on,
semijoin_with_cache=on,
join_cache_incremental=on,
join_cache_hashed=on,
join_cache_bka=on,
optimize_join_buffer_size=off,
table_elimination=on,
extended_keys=off,
exists_to_in=off
1 row in set (0.00 sec)
What you see is that condition pushdown is disabled by default in MariaDB so we have to enable it:
MariaDB [(none)]> set optimizer_switch='engine_condition_pushdown=on';
Query OK, 0 rows affected (0.00 sec)
MariaDB [(none)]> SELECT @@optimizer_switch\G
*************************** 1. row ***************************
@@optimizer_switch:
....
engine_condition_pushdown=on,
....
1 row in set (0.00 sec)
continuing
Everything is ready and awaiting out first test. So start your preferred frontend, connect to the server and start with these steps: I created a database for our tests and in this database I created a table into which some data is inserted:MariaDB [(none)]> create database TestCP;
Query OK, 1 row affected (0.00 sec)
MariaDB [(none)]> use TestCP;
Database changed
MariaDB [TestCP]> create table Test1 (
-> Id char(8),
-> PZN char(7),
-> EVP char(8),
-> HAP char(8),
-> ArtikelBez varchar(40),
-> ArtikelText varchar(26),
-> Hersteller char(5) )
-> engine=MINTEMPL
-> ;
Query OK, 0 rows affected (0.04 sec)
MariaDB [TestCP]> insert into Test1 select * from TestOK.ABDAOK limit 100;
Query OK, 100 rows affected (0.03 sec)
Records: 100 Duplicates: 0 Warnings: 0
MariaDB [TestCP]>
So, enough of the introductory work, let's start looking into the SQL-statement. Here is a first and very simple example:
SQL:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234';
And here is the output produced by our cond_push() that you can see in the console of Eclipse:
table_name = <Test1> <select * from Test1 where Hersteller = '1234'>
level=0 FUNC-ITEM=[=] args: 2 type=[EQ_FUNC]
level=1 FUNC-ITEM=[multiple equal] args: 0 type=[MULT_EQUAL_FUNC]
level=2 STRING-ITEM str_value=<1234> name=<1234>
level=3 FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
level=4 FIELD-ITEM [TestCP] [Test1] [ArtikelText] name=<ArtikelText>
level=5 FIELD-ITEM [TestCP] [Test1] [ArtikelBez] name=<ArtikelBez>
level=6 FIELD-ITEM [TestCP] [Test1] [HAP] name=<HAP>
level=7 FIELD-ITEM [TestCP] [Test1] [EVP] name=<EVP>
level=8 FIELD-ITEM [TestCP] [Test1] [PZN] name=<PZN>
level=9 FIELD-ITEM [TestCP] [Test1] [Id] name=<Id>
level=10 INT-ITEM val = -1 res=2
level=11 FUNC-ITEM=[=] args: 2 type=[EQ_FUNC]
level=12 STRING-ITEM str_value=<1234> name=<1234>
level=13 FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
Well, confusing.
But if you look a bit closer at the lines you will see:
- the name of the database: TestCP
- the name of the table: Test1
- the name of the column: Hersteller
- the string-constant: 1234;
Please look at the last 3 lines of the output. There you will see the condition (I reformatted it a bit):
FUNC-ITEM [EQ_FUNC]
STRING-ITEM <1234>
FIELD-ITEM [TestCP] [Test1] [Hersteller]
looks better now and this is our WHERE-condition in the form: EQ_FUNC('1234', TestCP.Test1.Hersteller)In the form of a tree it looks like this:
Let's make our example a bit more complicated:
SQL:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234' and HAP > 10;
and here is the output of the our function cond_push() in the console:table_name = <Test1> <select * from Test1 where Hersteller = '1234' and HAP > 10>
level=0 COND-ITEM args: 0 type=[COND_AND_FUNC]
level=1 COND-ITEM args: 0 type=[COND_AND_FUNC]
level=2 COND-ITEM args: 0 type=[COND_AND_FUNC]
level=3 FUNC-ITEM=[=] args: 2 type=[EQ_FUNC]
level=4 FUNC-ITEM=[multiple equal] args: 0 type=[MULT_EQUAL_FUNC]
level=5 COND-ITEM args: 0 type=[COND_AND_FUNC]
level=6 CACHE-ITEM
level=7 STRING-ITEM str_value=<1234> name=<1234>
level=8 FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
level=9 FIELD-ITEM [TestCP] [Test1] [ArtikelText] name=<ArtikelText>
level=10 FIELD-ITEM [TestCP] [Test1] [ArtikelBez] name=<ArtikelBez>
level=11 FIELD-ITEM [TestCP] [Test1] [HAP] name=<HAP>
level=12 FIELD-ITEM [TestCP] [Test1] [EVP] name=<EVP>
level=13 FIELD-ITEM [TestCP] [Test1] [PZN] name=<PZN>
level=14 FIELD-ITEM [TestCP] [Test1] [Id] name=<Id>
level=15 INT-ITEM val = -1 res=2
level=16 COND-ITEM args: 0 type=[COND_AND_FUNC]
level=17 FUNC-ITEM=[>] args: 2 type=[GT_FUNC]
level=18 INT-ITEM val = 10 res=2 name=<10>
level=19 FIELD-ITEM [TestCP] [Test1] [HAP] name=<HAP>
level=20 FUNC-ITEM=[=] args: 2 type=[EQ_FUNC]
level=21 STRING-ITEM str_value=<1234> name=<1234>
level=22 FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
When you look at the last 7 lines of this output you will see (again, I reformatted this a bit):
COND-ITEM [COND_AND_FUNC]
FUNC-ITEM [GT_FUNC]
INT-ITEM 10
FIELD-ITEM [TestCP].[Test1].[HAP]
FUNC-ITEM=[=] type=[EQ_FUNC]
STRING-ITEM <1234>
FIELD-ITEM [TestCP].[Test1].[Hersteller]
That looks promising: COND_AND_FUNC( GT_FUNC( 10, TestCP.Test1.HAP), EQ_FUNC( '1234'. TestCP.Test1.Hersteller) )And in the form of a tree:
Looks like we got the information, but it's hidden under a lot of other information. Can't the server deliver better results immediately?
Looking into the code of MariaDB or MySQL you can see some functions whose names suggest that these help in wandering through the condition-tree:
in item.h:
virtual void traverse_cond(Cond_traverser traverser,
void *arg, traverse_order order)
{
(*traverser)(this, arg);
}
in item_func.h: void traverse_cond(Cond_traverser traverser,
void * arg, traverse_order order);
you will find the implementation of Item_func::traverse_cond() in item_func.cc.in item_cmpfunc.h:
void traverse_cond(Cond_traverser, void *arg, traverse_order order);
You will find the implementation of Item_cond::traverse_cond() in item_cmpfunc.cc.So let's start, in Eclipse set breakpoints on these functions and simply repeat our tests.You will see ..... nothing, these functions are not called. The reason for this: we have to call them.
another try
As before: instead of long explanations I will show you some code:in the top of the file ha_mintempl.cc please add these two lines:
static void display_functype( int );
static void handle_Item( const Item *item, void *arg);
and later in the file please add these lines (which replace the lines of our first try):/**
* WHERE
*
* in case you want to know something about the WHERE-clause
* you can use the following code as an example
*/
static void display_functype( int type)
{
const char *strFuncType[] = { "UNKNOWN_FUNC",
"EQ_FUNC", "EQUAL_FUNC", "NE_FUNC", "LT_FUNC",
"LE_FUNC", "GE_FUNC", "GT_FUNC", "FT_FUNC",
"LIKE_FUNC", "ISNULL_FUNC", "ISNOTNULL_FUNC", "COND_AND_FUNC",
"COND_OR_FUNC", "COND_XOR_FUNC", "BETWEEN", "IN_FUNC",
"MULT_EQUAL_FUNC", "INTERVAL_FUNC", "ISNOTNULLTEST_FUNC", "SP_EQUALS_FUNC",
"SP_DISJOINT_FUNC", "SP_INTERSECTS_FUNC", "SP_TOUCHES_FUNC", "SP_CROSSES_FUNC",
"SP_WITHIN_FUNC", "SP_CONTAINS_FUNC", "SP_OVERLAPS_FUNC", "SP_STARTPOINT",
"SP_ENDPOINT", "SP_EXTERIORRING", "SP_POINTN", "SP_GEOMETRYN",
"SP_INTERIORRINGN", "NOT_FUNC", "NOT_ALL_FUNC", "NOW_FUNC",
"TRIG_COND_FUNC", "SUSERVAR_FUNC", "GUSERVAR_FUNC", "COLLATE_FUNC",
"EXTRACT_FUNC", "CHAR_TYPECAST_FUNC", "FUNC_SP", "UDF_FUNC",
"NEG_FUNC", "GSYSVAR_FUNC" };
fprintf(stderr, "type=[%s]\t", strFuncType[type]);
}
const COND * ha_mintempl::cond_push(const COND *cond)
{
DBUG_ENTER("ha_mintempl::cond_push");
THD *cThd = current_thd;
// access the full query:
fprintf(stderr, "\ntable_name = <%s>\t<%s>\n", table_share->table_name.str, cThd->query());
// the query prepared by MySQL/MariaDB for this table:
((Item*)cond)->traverse_cond(&handle_Item, NULL, Item::PREFIX); // Item::POSTFIX
// future extension: tell the server that we take over all of the work
// DBUG_RETURN(NULL);
DBUG_RETURN(cond);
};
static void handle_Item(const Item *item, void *arg)
{
COND * tempCond = (COND*)item;
if ( tempCond == NULL )
return;
switch( tempCond->type() )
{
case Item::FIELD_ITEM:
fprintf(stderr, "FIELD-ITEM\t");
fprintf(stderr, "[%s] [%s] [%s]",((Item_field*)tempCond)->db_name, ((Item_field*)tempCond)->table_name, ((Item_field*)tempCond)->field_name);
break;
case Item::FUNC_ITEM:
fprintf(stderr, "FUNC-ITEM\t");
fprintf(stderr, "[%s]\t", ((Item_func*)tempCond)->func_name());
fprintf(stderr, "args: %d\t", ((Item_func*)tempCond)->argument_count() );
display_functype( (int)((Item_func*)tempCond)->functype());
break;
case Item::SUM_FUNC_ITEM:
fprintf(stderr, "SUM-FUNC-ITEM\t");
break;
case Item::STRING_ITEM:
fprintf(stderr, "STRING-ITEM\t");
break;
case Item::INT_ITEM:
fprintf(stderr, "INT-ITEM\t");
fprintf(stderr, "val = %lld\t", ((Item_int*)tempCond)->value);
fprintf(stderr, "res=%d", ((Item_int*)tempCond)->result_type());
break;
case Item::REAL_ITEM:
fprintf(stderr, "REAL-ITEM\t");
break;
case Item::NULL_ITEM:
fprintf(stderr, "NULL-ITEM\t");
break;
case Item::VARBIN_ITEM:
fprintf(stderr, "VAR-BIN\t");
break;
case Item::COPY_STR_ITEM:
fprintf(stderr, "COPY-STR-ITEM\t");
break;
case Item::FIELD_AVG_ITEM:
fprintf(stderr, "FIELD-AVG-ITEM\t");
break;
case Item::DEFAULT_VALUE_ITEM:
fprintf(stderr, "DEFAULT-VLAUE-ITEM\t");
break;
case Item::PROC_ITEM:
fprintf(stderr, "PROC-ITEM\t");
break;
case Item::COND_ITEM:
fprintf(stderr, "COND-ITEM\t");
fprintf(stderr, "args: %d\t", ((Item_cond*)tempCond)->argument_count() );
display_functype( (int)((Item_cond*)tempCond)->functype());
break;
case Item::REF_ITEM:
fprintf(stderr, "REF-ITEM\t");
break;
case Item::FIELD_STD_ITEM:
fprintf(stderr, "FIELD-STD-ITEM\t");
break;
case Item::FIELD_VARIANCE_ITEM:
fprintf(stderr, "FIELD-VAIRANCE-ITEM\t");
break;
case Item::INSERT_VALUE_ITEM:
fprintf(stderr, "INSERT-VALUE-ITEM\t");
break;
case Item::SUBSELECT_ITEM:
fprintf(stderr, "SUBSELECT-ITEM\t");
break;
case Item::ROW_ITEM:
fprintf(stderr, "ROW-ITEM\t");
break;
case Item::CACHE_ITEM:
fprintf(stderr, "CACHE-ITEM\t");
break;
case Item::TYPE_HOLDER:
fprintf(stderr, "TYPE-HOLDER\t");
break;
case Item::PARAM_ITEM:
fprintf(stderr, "PARAM-ITEM\t");
break;
case Item::TRIGGER_FIELD_ITEM:
fprintf(stderr, "TRIGGER-FIELD-ITEM\t");
break;
case Item::DECIMAL_ITEM:
fprintf(stderr, "DECIMAL-ITEM\t");
break;
case Item::XPATH_NODESET:
fprintf(stderr, "XPATH-NODESET\t");
break;
case Item::XPATH_NODESET_CMP:
fprintf(stderr, "XPATH-NODESET-CMP\t");
break;
case Item::VIEW_FIXER_ITEM:
fprintf(stderr, "VIEW-FIXER-ITEM\t");
break;
default:
fprintf(stderr, "unknown: %d\t", tempCond->type());
break;
}
if (tempCond->str_value.ptr() != NULL )
fprintf(stderr, "\tstr_value=<%s>", tempCond->str_value.ptr());
if ( tempCond->name != NULL )
fprintf(stderr, "\tname=<%s>", tempCond->name);
fprintf(stderr, "\n");
}
Our new cond_push()-functions looks a bit different now, it contains only some lines of code. Aside some debug-code it contains one line of interest: this line calls the traverse_cond()-function of an Item. The traverse_cond()-function is given the address of a function, a NULL-constant and the constant PREFIX (more on this later).
If you compare the code in the old function cond_push() and the new function handle_Item() you will see that these look very similar. The difference is that the code in the old function starts with the condition and walks through it line by line until it reaches a NULL. The new function does not contain this part because it handles only one line of the conditions (=one Item) and then returns to the caller and the server traverses to the next condition and calls our function again.
So, enough of explanations for the moment, recompile everything, start your server and in the case of MariaDB do not forget to enable condition_pushdown again.
Let's start with the new code and our first SQL-statement, the simple on:
SQL:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234';
the output in the console:table_name = <Test1> <select * from Test1 where Hersteller = '1234'>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
This looks better now so we take the second SQL-statement:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234' and HAP > 10;
the output in the console:table_name = <Test1> <select * from Test1 where Hersteller = '1234' and HAP > 10>
COND-ITEM args: 0 type=[COND_AND_FUNC]
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
FUNC-ITEM [>] args: 2 type=[GT_FUNC]
FIELD-ITEM [TestCP] [Test1] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
Looks good, at least for me.If you look at the code of cond_push() you will see a constant: Item::PREFIX. Replace it with Item::POSTFIX, recompile and do the tests again. These are the results:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234';
table_name = <Test1> <select * from Test1 where Hersteller = '1234'>
FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
another example:MariaDB [TestCP]> select * from Test1 where Hersteller = '1234' and HAP > 10;
table_name = <Test1> <select * from Test1 where Hersteller = '1234' and HAP > 10>
FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [Test1] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
FUNC-ITEM [>] args: 2 type=[GT_FUNC]
COND-ITEM args: 0 type=[COND_AND_FUNC]
The server delivers the WHERE-condition in the RPN- or PN-format as you wish. In the code I switched back to Item::PREFIX.Now you can play with the code given here and test some more SQL-statements of your own. And you can modify the code given here or write your own code to get more information out of the condition. MySQL and MariaDB prepare a condition-tree for us that contains much more information than I showed you here.
But wait: what about JOINs?
The examples I showed before where simple statements as they did relate to only one table. So let's try some more complicated statements.
To begin with this we needed at least one more table:
MariaDB [TestCP]> create table Test2 (
-> Id char(8),
-> PZN char(7),
-> EVP char(8),
-> HAP char(8),
-> ArtikelBez varchar(40),
-> ArtikelText varchar(26),
-> Hersteller char(5) )
-> engine=MINTEMPL
-> ;
Query OK, 0 rows affected (0.04 sec)
MariaDB [TestCP]> insert into Test2 select * from TestOK.ABDAOK limit 100;
Query OK, 100 rows affected (0.01 sec)
Records: 100 Duplicates: 0 Warnings: 0
MariaDB [TestCP]>
Now we can do some tests. I simplified and polished the output a bit:
SQL:
MariaDB [TestCP]> select A.*, B.*
-> from Test1 A, Test2 B
-> where A.Hersteller = '1234'
-> and B.Hersteller >= '12345'
-> and A.Id = B.Id
-> ;
console-output:table_name = <Test1>
<select A.*, B.*
from Test1 A, Test2 B
where A.Hersteller = '1234'
and B.Hersteller >= '12345'
and A.Id = B.Id>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [A] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
table_name = <Test2>
<select A.*, B.*
from Test1 A, Test2 B
where A.Hersteller = '1234'
and B.Hersteller >= '12345'
and A.Id = B.Id>
FUNC-ITEM [>=] args: 2 type=[GE_FUNC]
FIELD-ITEM [TestCP] [B] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<12345> name=<12345>
As you can see it's a JOIN on 2 tables: Test1 and Test2 (both use our engine MinTempl because this engine contains the code to produce this output). And in the console you see the name of the table followed by the full SQL-statement. Then you'll see the WHERE-part of the SQL-statement but you will see only those conditions relevant to table Test1. The same is true for the table Test2, it also gets the WHERE-condition stripped down to this table.
The function cond_push() is called twice, first for the table Test1. In this call it contains the condition for this table only (something like a WHERE-clause for this table). Then it is called again with the condition for table Test2.
So let's make our statement a bit more complex by adding some AND-conditions to the WHERE-part:
SQL:
MariaDB [TestCP]> select count(*)
from Test1 A, Test2 B
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10
and A.Id = B.Id
console-output:table_name = <Test1>
You for sure know an SQL-statement like
SELECT *
FROM table
WHERE condition
From a storage-engine point some questions arise:
- can I see this statement ?
- what's in WHERE ?
- can I access the WHERE-clause ?
- can I use this information in my code (=storage engine) ?
The answer is YES. In this post I like to show you how to access this information. Using it in the engine will be a different thing, maybe I will show you in a later post how to do this.
This topic is named condition pushdown. Here you will find more information about this topic: MySQL 5.1 Plugin Development.
Gaining access to the information in the SQL-statement is simple: every engine is derived from a class handler
and in this class you will find a function cond_push().
let's look into handler.h:
virtual const COND *cond_push(const COND *cond) { return cond; };
it's defined, but it does nothing.
As a summary: by handling an SQL-statement the server prepares the statement, converts it into an appropriate form and for every table referenced in the statement it calls this routine in the storage-engine. For accessing this information we have to implement this function in our storage-engine.
So let's do it. Please allow me to skip long explanations but simply show you some code. Take the code from MinTempl as a base and extend it by adding via copy and paste:
in the header-file:
const COND *cond_push(const COND *cond);
in the code-file:
/**
* WHERE
*
* in case you want to know something about the WHERE-clause
* you can use the following code as an example
*/
static void display_functype( int type)
{
const char *strFuncType[] = { "UNKNOWN_FUNC",
"EQ_FUNC", "EQUAL_FUNC", "NE_FUNC", "LT_FUNC",
"LE_FUNC", "GE_FUNC", "GT_FUNC", "FT_FUNC",
"LIKE_FUNC", "ISNULL_FUNC", "ISNOTNULL_FUNC", "COND_AND_FUNC",
"COND_OR_FUNC", "COND_XOR_FUNC", "BETWEEN", "IN_FUNC",
"MULT_EQUAL_FUNC", "INTERVAL_FUNC", "ISNOTNULLTEST_FUNC", "SP_EQUALS_FUNC",
"SP_DISJOINT_FUNC", "SP_INTERSECTS_FUNC", "SP_TOUCHES_FUNC", "SP_CROSSES_FUNC",
"SP_WITHIN_FUNC", "SP_CONTAINS_FUNC", "SP_OVERLAPS_FUNC", "SP_STARTPOINT",
"SP_ENDPOINT", "SP_EXTERIORRING", "SP_POINTN", "SP_GEOMETRYN",
"SP_INTERIORRINGN", "NOT_FUNC", "NOT_ALL_FUNC", "NOW_FUNC",
"TRIG_COND_FUNC", "SUSERVAR_FUNC", "GUSERVAR_FUNC", "COLLATE_FUNC",
"EXTRACT_FUNC", "CHAR_TYPECAST_FUNC", "FUNC_SP", "UDF_FUNC",
"NEG_FUNC", "GSYSVAR_FUNC" };
fprintf(stderr, "type=[%s]\t", strFuncType[type]);
}
const COND *ha_mintempl::cond_push(const COND *cond)
{
COND * tempCond = (COND*)cond;
if ( tempCond == NULL )
return NULL;
fprintf(stderr, "\ntable_name = <%s>\t<%s>\n", table_share->table_name.str, current_thd->query());
int level = 0;
fprintf(stderr, "\n");
while ( tempCond->next != NULL)
{
fprintf(stderr, "level=%d ", level);
switch( tempCond->type() )
{
case Item::FIELD_ITEM:
fprintf(stderr, "FIELD-ITEM");
fprintf(stderr, "\t[%s] [%s] [%s]",((Item_field*)tempCond)->db_name, ((Item_field*)tempCond)->table_name, ((Item_field*)tempCond)->field_name);
break;
case Item::FUNC_ITEM:
fprintf(stderr, "FUNC-ITEM");
fprintf(stderr, "=[%s]", ((Item_func*)tempCond)->func_name());
fprintf(stderr, " args: %d ", ((Item_func*)tempCond)->argument_count() );
display_functype( (int)((Item_func*)tempCond)->functype());
break;
case Item::SUM_FUNC_ITEM:
fprintf(stderr, "SUM-FUNC--ITEM");
break;
case Item::STRING_ITEM:
fprintf(stderr, "STRING-ITEM");
break;
case Item::INT_ITEM:
fprintf(stderr, "INT-ITEM");
fprintf(stderr, "\tval = %lld", ((Item_int*)tempCond)->value);
fprintf(stderr, " res=%d", ((Item_int*)tempCond)->result_type());
break;
case Item::REAL_ITEM:
fprintf(stderr, "REAL-ITEM");
break;
case Item::NULL_ITEM:
fprintf(stderr, "NULL-ITEM");
break;
case Item::VARBIN_ITEM:
fprintf(stderr, "VAR-BIN");
break;
case Item::COPY_STR_ITEM:
fprintf(stderr, "COPY-STR-ITEM");
break;
case Item::FIELD_AVG_ITEM:
fprintf(stderr, "FIELD-AVG-ITEM");
break;
case Item::DEFAULT_VALUE_ITEM:
fprintf(stderr, "DEFAULT-VLAUE-ITEM");
break;
case Item::PROC_ITEM:
fprintf(stderr, "PROC-ITEM");
break;
case Item::COND_ITEM:
fprintf(stderr, "COND-ITEM");
fprintf(stderr, " args: %d ", ((Item_cond*)tempCond)->argument_count() );
display_functype( (int)((Item_cond*)tempCond)->functype());
break;
case Item::REF_ITEM:
fprintf(stderr, "REF-ITEM");
break;
case Item::FIELD_STD_ITEM:
fprintf(stderr, "FIELD-STD-ITEM");
break;
case Item::FIELD_VARIANCE_ITEM:
fprintf(stderr, "FIELD-VAIRANCE-ITEM");
break;
case Item::INSERT_VALUE_ITEM:
fprintf(stderr, "INSERT-VALUE-ITEM");
break;
case Item::SUBSELECT_ITEM:
fprintf(stderr, "SUBSELECT-ITEM");
break;
case Item::ROW_ITEM:
fprintf(stderr, "ROW-ITEM");
break;
case Item::CACHE_ITEM:
fprintf(stderr, "CACHE-ITEM");
break;
case Item::TYPE_HOLDER:
fprintf(stderr, "TYPE-HOLDER");
break;
case Item::PARAM_ITEM:
fprintf(stderr, "PARAM-ITEM");
break;
case Item::TRIGGER_FIELD_ITEM:
fprintf(stderr, "TRIGGER-FIELD-ITEM");
break;
case Item::DECIMAL_ITEM:
fprintf(stderr, "DECIMAL-ITEM");
break;
case Item::XPATH_NODESET:
fprintf(stderr, "XPATH-NODESET");
break;
case Item::XPATH_NODESET_CMP:
fprintf(stderr, "XPATH-NODESET-CMP");
break;
case Item::VIEW_FIXER_ITEM:
fprintf(stderr, "VIEW-FIXER-ITEM");
break;
default:
fprintf(stderr, "unknown: %d ", tempCond->type());
break;
}
if (tempCond->str_value.ptr() != NULL )
fprintf(stderr, "\tstr_value=<%s>", tempCond->str_value.ptr());
if ( tempCond->name != NULL )
fprintf(stderr, "\tname=<%s>", tempCond->name);
level++;
tempCond=tempCond->next;
fprintf(stderr, "\n");
}
fprintf(stderr, "\n");
return cond;
}
hint: please lok atthe update-section at the end.You will find a ink to an updated-version of this code i this scetion
Then recompile everything. Now you have a storage-engine MinTempl with a meaning quite different from the one described in the posting that can be found in the archive of the blog (but that's what I used this template for: extending and playing with the code). If you prefer you can create a copy of MinTempl and give it a new name. For this posting I will call the storage-engine with the added code MinTempl.
When you start your fresh compiled server server you should check if MinTempl is there and if it is not you should install it as described in an earlier posting.
MariaDB
For MySQL everything is ready and awaiting the first test but for MariaDB you have to do something. Please look at this output which I reformatted and polished a bit:
MariaDB [(none)]> SELECT @@optimizer_switch\G
*************************** 1. row ***************************
@@optimizer_switch:
index_merge=on,
index_merge_union=on,
index_merge_sort_union=on,
index_merge_intersection=on,
index_merge_sort_intersection=off,
engine_condition_pushdown=off,
index_condition_pushdown=on,
derived_merge=on,
derived_with_keys=on,
firstmatch=on,loosescan=on,
materialization=on,
in_to_exists=on,
semijoin=on,
partial_match_rowid_merge=on,
partial_match_table_scan=on,
subquery_cache=on,
mrr=off,
mrr_cost_based=off,
mrr_sort_keys=off,
outer_join_with_cache=on,
semijoin_with_cache=on,
join_cache_incremental=on,
join_cache_hashed=on,
join_cache_bka=on,
optimize_join_buffer_size=off,
table_elimination=on,
extended_keys=off,
exists_to_in=off
1 row in set (0.00 sec)
What you see is that condition pushdown is disabled by default in MariaDB so we have to enable it:
MariaDB [(none)]> set optimizer_switch='engine_condition_pushdown=on';
Query OK, 0 rows affected (0.00 sec)
MariaDB [(none)]> SELECT @@optimizer_switch\G
*************************** 1. row ***************************
@@optimizer_switch:
....
engine_condition_pushdown=on,
....
1 row in set (0.00 sec)
continuing
Everything is ready and awaiting out first test. So start your preferred frontend, connect to the server and start with these steps: I created a database for our tests and in this database I created a table into which some data is inserted:
MariaDB [(none)]> create database TestCP;
Query OK, 1 row affected (0.00 sec)
MariaDB [(none)]> use TestCP;
Database changed
MariaDB [TestCP]> create table Test1 (
-> Id char(8),
-> PZN char(7),
-> EVP char(8),
-> HAP char(8),
-> ArtikelBez varchar(40),
-> ArtikelText varchar(26),
-> Hersteller char(5) )
-> engine=MINTEMPL
-> ;
Query OK, 0 rows affected (0.04 sec)
MariaDB [TestCP]> insert into Test1 select * from TestOK.ABDAOK limit 100;
Query OK, 100 rows affected (0.03 sec)
Records: 100 Duplicates: 0 Warnings: 0
MariaDB [TestCP]>
So, enough of the introductory work, let's start looking into the SQL-statement. Here is a first and very simple example:
SQL:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234';
And here is the output produced by our cond_push() that you can see in the console of Eclipse:
table_name = <Test1> <select * from Test1 where Hersteller = '1234'>
level=0 FUNC-ITEM=[=] args: 2 type=[EQ_FUNC]
level=1 FUNC-ITEM=[multiple equal] args: 0 type=[MULT_EQUAL_FUNC]
level=2 STRING-ITEM str_value=<1234> name=<1234>
level=3 FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
level=4 FIELD-ITEM [TestCP] [Test1] [ArtikelText] name=<ArtikelText>
level=5 FIELD-ITEM [TestCP] [Test1] [ArtikelBez] name=<ArtikelBez>
level=6 FIELD-ITEM [TestCP] [Test1] [HAP] name=<HAP>
level=7 FIELD-ITEM [TestCP] [Test1] [EVP] name=<EVP>
level=8 FIELD-ITEM [TestCP] [Test1] [PZN] name=<PZN>
level=9 FIELD-ITEM [TestCP] [Test1] [Id] name=<Id>
level=10 INT-ITEM val = -1 res=2
level=11 FUNC-ITEM=[=] args: 2 type=[EQ_FUNC]
level=12 STRING-ITEM str_value=<1234> name=<1234>
level=13 FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
Well, confusing.
But if you look a bit closer at the lines you will see:
- the name of the database: TestCP
- the name of the table: Test1
- the name of the column: Hersteller
- the string-constant: 1234;
and so on.
Please look at the last 3 lines of the output. There you will see the condition (I reformatted it a bit):
FUNC-ITEM [EQ_FUNC]
STRING-ITEM <1234>
FIELD-ITEM [TestCP] [Test1] [Hersteller]
looks better now and this is our WHERE-condition in the form: EQ_FUNC('1234', TestCP.Test1.Hersteller)
In the form of a tree it looks like this:
Let's make our example a bit more complicated:
SQL:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234' and HAP > 10;
and here is the output of the our function cond_push() in the console:
table_name = <Test1> <select * from Test1 where Hersteller = '1234' and HAP > 10>
level=0 COND-ITEM args: 0 type=[COND_AND_FUNC]
level=1 COND-ITEM args: 0 type=[COND_AND_FUNC]
level=2 COND-ITEM args: 0 type=[COND_AND_FUNC]
level=3 FUNC-ITEM=[=] args: 2 type=[EQ_FUNC]
level=4 FUNC-ITEM=[multiple equal] args: 0 type=[MULT_EQUAL_FUNC]
level=5 COND-ITEM args: 0 type=[COND_AND_FUNC]
level=6 CACHE-ITEM
level=7 STRING-ITEM str_value=<1234> name=<1234>
level=8 FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
level=9 FIELD-ITEM [TestCP] [Test1] [ArtikelText] name=<ArtikelText>
level=10 FIELD-ITEM [TestCP] [Test1] [ArtikelBez] name=<ArtikelBez>
level=11 FIELD-ITEM [TestCP] [Test1] [HAP] name=<HAP>
level=12 FIELD-ITEM [TestCP] [Test1] [EVP] name=<EVP>
level=13 FIELD-ITEM [TestCP] [Test1] [PZN] name=<PZN>
level=14 FIELD-ITEM [TestCP] [Test1] [Id] name=<Id>
level=15 INT-ITEM val = -1 res=2
level=16 COND-ITEM args: 0 type=[COND_AND_FUNC]
level=17 FUNC-ITEM=[>] args: 2 type=[GT_FUNC]
level=18 INT-ITEM val = 10 res=2 name=<10>
level=19 FIELD-ITEM [TestCP] [Test1] [HAP] name=<HAP>
level=20 FUNC-ITEM=[=] args: 2 type=[EQ_FUNC]
level=21 STRING-ITEM str_value=<1234> name=<1234>
level=22 FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
When you look at the last 7 lines of this output you will see (again, I reformatted this a bit):
COND-ITEM [COND_AND_FUNC]
FUNC-ITEM [GT_FUNC]
INT-ITEM 10
FIELD-ITEM [TestCP].[Test1].[HAP]
FUNC-ITEM=[=] type=[EQ_FUNC]
STRING-ITEM <1234>
FIELD-ITEM [TestCP].[Test1].[Hersteller]
That looks promising: COND_AND_FUNC( GT_FUNC( 10, TestCP.Test1.HAP), EQ_FUNC( '1234'. TestCP.Test1.Hersteller) )
And in the form of a tree:
Looks like we got the information, but it's hidden under a lot of other information. Can't the server deliver better results immediately?
Looking into the code of MariaDB or MySQL you can see some functions whose names suggest that these help in wandering through the condition-tree:
in item.h:
virtual void traverse_cond(Cond_traverser traverser,
void *arg, traverse_order order)
{
(*traverser)(this, arg);
}
in item_func.h:
void traverse_cond(Cond_traverser traverser,
void * arg, traverse_order order);
you will find the implementation of Item_func::traverse_cond() in item_func.cc.
in item_cmpfunc.h:
void traverse_cond(Cond_traverser, void *arg, traverse_order order);
You will find the implementation of Item_cond::traverse_cond() in item_cmpfunc.cc.
So let's start, in Eclipse set breakpoints on these functions and simply repeat our tests.You will see ..... nothing, these functions are not called. The reason for this: we have to call them.
another try
As before: instead of long explanations I will show you some code:
in the top of the file ha_mintempl.cc please add these two lines:
static void display_functype( int );
static void handle_Item( const Item *item, void *arg);
and later in the file please add these lines (which replace the lines of our first try):
/**
* WHERE
*
* in case you want to know something about the WHERE-clause
* you can use the following code as an example
*/
static void display_functype( int type)
{
const char *strFuncType[] = { "UNKNOWN_FUNC",
"EQ_FUNC", "EQUAL_FUNC", "NE_FUNC", "LT_FUNC",
"LE_FUNC", "GE_FUNC", "GT_FUNC", "FT_FUNC",
"LIKE_FUNC", "ISNULL_FUNC", "ISNOTNULL_FUNC", "COND_AND_FUNC",
"COND_OR_FUNC", "COND_XOR_FUNC", "BETWEEN", "IN_FUNC",
"MULT_EQUAL_FUNC", "INTERVAL_FUNC", "ISNOTNULLTEST_FUNC", "SP_EQUALS_FUNC",
"SP_DISJOINT_FUNC", "SP_INTERSECTS_FUNC", "SP_TOUCHES_FUNC", "SP_CROSSES_FUNC",
"SP_WITHIN_FUNC", "SP_CONTAINS_FUNC", "SP_OVERLAPS_FUNC", "SP_STARTPOINT",
"SP_ENDPOINT", "SP_EXTERIORRING", "SP_POINTN", "SP_GEOMETRYN",
"SP_INTERIORRINGN", "NOT_FUNC", "NOT_ALL_FUNC", "NOW_FUNC",
"TRIG_COND_FUNC", "SUSERVAR_FUNC", "GUSERVAR_FUNC", "COLLATE_FUNC",
"EXTRACT_FUNC", "CHAR_TYPECAST_FUNC", "FUNC_SP", "UDF_FUNC",
"NEG_FUNC", "GSYSVAR_FUNC" };
fprintf(stderr, "type=[%s]\t", strFuncType[type]);
}
const COND * ha_mintempl::cond_push(const COND *cond)
{
DBUG_ENTER("ha_mintempl::cond_push");
THD *cThd = current_thd;
// access the full query:
fprintf(stderr, "\ntable_name = <%s>\t<%s>\n", table_share->table_name.str, cThd->query());
// the query prepared by MySQL/MariaDB for this table:
((Item*)cond)->traverse_cond(&handle_Item, NULL, Item::PREFIX); // Item::POSTFIX
// future extension: tell the server that we take over all of the work
// DBUG_RETURN(NULL);
DBUG_RETURN(cond);
};
static void handle_Item(const Item *item, void *arg)
{
COND * tempCond = (COND*)item;
if ( tempCond == NULL )
return;
switch( tempCond->type() )
{
case Item::FIELD_ITEM:
fprintf(stderr, "FIELD-ITEM\t");
fprintf(stderr, "[%s] [%s] [%s]",((Item_field*)tempCond)->db_name, ((Item_field*)tempCond)->table_name, ((Item_field*)tempCond)->field_name);
break;
case Item::FUNC_ITEM:
fprintf(stderr, "FUNC-ITEM\t");
fprintf(stderr, "[%s]\t", ((Item_func*)tempCond)->func_name());
fprintf(stderr, "args: %d\t", ((Item_func*)tempCond)->argument_count() );
display_functype( (int)((Item_func*)tempCond)->functype());
break;
case Item::SUM_FUNC_ITEM:
fprintf(stderr, "SUM-FUNC-ITEM\t");
break;
case Item::STRING_ITEM:
fprintf(stderr, "STRING-ITEM\t");
break;
case Item::INT_ITEM:
fprintf(stderr, "INT-ITEM\t");
fprintf(stderr, "val = %lld\t", ((Item_int*)tempCond)->value);
fprintf(stderr, "res=%d", ((Item_int*)tempCond)->result_type());
break;
case Item::REAL_ITEM:
fprintf(stderr, "REAL-ITEM\t");
break;
case Item::NULL_ITEM:
fprintf(stderr, "NULL-ITEM\t");
break;
case Item::VARBIN_ITEM:
fprintf(stderr, "VAR-BIN\t");
break;
case Item::COPY_STR_ITEM:
fprintf(stderr, "COPY-STR-ITEM\t");
break;
case Item::FIELD_AVG_ITEM:
fprintf(stderr, "FIELD-AVG-ITEM\t");
break;
case Item::DEFAULT_VALUE_ITEM:
fprintf(stderr, "DEFAULT-VLAUE-ITEM\t");
break;
case Item::PROC_ITEM:
fprintf(stderr, "PROC-ITEM\t");
break;
case Item::COND_ITEM:
fprintf(stderr, "COND-ITEM\t");
fprintf(stderr, "args: %d\t", ((Item_cond*)tempCond)->argument_count() );
display_functype( (int)((Item_cond*)tempCond)->functype());
break;
case Item::REF_ITEM:
fprintf(stderr, "REF-ITEM\t");
break;
case Item::FIELD_STD_ITEM:
fprintf(stderr, "FIELD-STD-ITEM\t");
break;
case Item::FIELD_VARIANCE_ITEM:
fprintf(stderr, "FIELD-VAIRANCE-ITEM\t");
break;
case Item::INSERT_VALUE_ITEM:
fprintf(stderr, "INSERT-VALUE-ITEM\t");
break;
case Item::SUBSELECT_ITEM:
fprintf(stderr, "SUBSELECT-ITEM\t");
break;
case Item::ROW_ITEM:
fprintf(stderr, "ROW-ITEM\t");
break;
case Item::CACHE_ITEM:
fprintf(stderr, "CACHE-ITEM\t");
break;
case Item::TYPE_HOLDER:
fprintf(stderr, "TYPE-HOLDER\t");
break;
case Item::PARAM_ITEM:
fprintf(stderr, "PARAM-ITEM\t");
break;
case Item::TRIGGER_FIELD_ITEM:
fprintf(stderr, "TRIGGER-FIELD-ITEM\t");
break;
case Item::DECIMAL_ITEM:
fprintf(stderr, "DECIMAL-ITEM\t");
break;
case Item::XPATH_NODESET:
fprintf(stderr, "XPATH-NODESET\t");
break;
case Item::XPATH_NODESET_CMP:
fprintf(stderr, "XPATH-NODESET-CMP\t");
break;
case Item::VIEW_FIXER_ITEM:
fprintf(stderr, "VIEW-FIXER-ITEM\t");
break;
default:
fprintf(stderr, "unknown: %d\t", tempCond->type());
break;
}
if (tempCond->str_value.ptr() != NULL )
fprintf(stderr, "\tstr_value=<%s>", tempCond->str_value.ptr());
if ( tempCond->name != NULL )
fprintf(stderr, "\tname=<%s>", tempCond->name);
fprintf(stderr, "\n");
}
Our new cond_push()-functions looks a bit different now, it contains only some lines of code. Aside some debug-code it contains one line of interest: this line calls the traverse_cond()-function of an Item. The traverse_cond()-function is given the address of a function, a NULL-constant and the constant PREFIX (more on this later).
If you compare the code in the old function cond_push() and the new function handle_Item() you will see that these look very similar. The difference is that the code in the old function starts with the condition and walks through it line by line until it reaches a NULL. The new function does not contain this part because it handles only one line of the conditions (=one Item) and then returns to the caller and the server traverses to the next condition and calls our function again.
So, enough of explanations for the moment, recompile everything, start your server and in the case of MariaDB do not forget to enable condition_pushdown again.
Let's start with the new code and our first SQL-statement, the simple on:
SQL:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234';
the output in the console:
table_name = <Test1> <select * from Test1 where Hersteller = '1234'>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
This looks better now so we take the second SQL-statement:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234' and HAP > 10;
the output in the console:
table_name = <Test1> <select * from Test1 where Hersteller = '1234' and HAP > 10>
COND-ITEM args: 0 type=[COND_AND_FUNC]
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
FUNC-ITEM [>] args: 2 type=[GT_FUNC]
FIELD-ITEM [TestCP] [Test1] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
Looks good, at least for me.
If you look at the code of cond_push() you will see a constant: Item::PREFIX. Replace it with Item::POSTFIX, recompile and do the tests again. These are the results:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234';
table_name = <Test1> <select * from Test1 where Hersteller = '1234'>
FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
another example:
MariaDB [TestCP]> select * from Test1 where Hersteller = '1234' and HAP > 10;
table_name = <Test1> <select * from Test1 where Hersteller = '1234' and HAP > 10>
FIELD-ITEM [TestCP] [Test1] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [Test1] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
FUNC-ITEM [>] args: 2 type=[GT_FUNC]
COND-ITEM args: 0 type=[COND_AND_FUNC]
The server delivers the WHERE-condition in the RPN- or PN-format as you wish. In the code I switched back to Item::PREFIX.
Now you can play with the code given here and test some more SQL-statements of your own. And you can modify the code given here or write your own code to get more information out of the condition. MySQL and MariaDB prepare a condition-tree for us that contains much more information than I showed you here.
But wait: what about JOINs?
The examples I showed before where simple statements as they did relate to only one table. So let's try some more complicated statements.
To begin with this we needed at least one more table:
MariaDB [TestCP]> create table Test2 (
-> Id char(8),
-> PZN char(7),
-> EVP char(8),
-> HAP char(8),
-> ArtikelBez varchar(40),
-> ArtikelText varchar(26),
-> Hersteller char(5) )
-> engine=MINTEMPL
-> ;
Query OK, 0 rows affected (0.04 sec)
MariaDB [TestCP]> insert into Test2 select * from TestOK.ABDAOK limit 100;
Query OK, 100 rows affected (0.01 sec)
Records: 100 Duplicates: 0 Warnings: 0
MariaDB [TestCP]>
Now we can do some tests. I simplified and polished the output a bit:
SQL:
MariaDB [TestCP]> select A.*, B.*
-> from Test1 A, Test2 B
-> where A.Hersteller = '1234'
-> and B.Hersteller >= '12345'
-> and A.Id = B.Id
-> ;
console-output:
table_name = <Test1>
<select A.*, B.*
from Test1 A, Test2 B
where A.Hersteller = '1234'
and B.Hersteller >= '12345'
and A.Id = B.Id>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [A] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
table_name = <Test2>
<select A.*, B.*
from Test1 A, Test2 B
where A.Hersteller = '1234'
and B.Hersteller >= '12345'
and A.Id = B.Id>
FUNC-ITEM [>=] args: 2 type=[GE_FUNC]
FIELD-ITEM [TestCP] [B] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<12345> name=<12345>
As you can see it's a JOIN on 2 tables: Test1 and Test2 (both use our engine MinTempl because this engine contains the code to produce this output). And in the console you see the
name of the table followed by the full SQL-statement. Then you'll see the WHERE-part of the SQL-statement but you will see only those conditions relevant to table Test1.
The same is true for the table Test2, it also gets the WHERE-condition stripped down to this table.
The function cond_push() is called twice, first for the table Test1. In this call it contains the condition for this table only (something like a WHERE-clause for this table). Then it is called again with the condition for table Test2.
So let's make our statement a bit more complex by adding some AND-conditions to the WHERE-part:
SQL:
MariaDB [TestCP]> select count(*)
from Test1 A, Test2 B
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10
and A.Id = B.Id
console-output:
table_name = <Test1>
<select count(*)
from Test1 A, Test2 B
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10
and A.Id = B.Id>
COND-ITEM args: 0 type=[COND_AND_FUNC]
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [A] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [A] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
table_name = <Test2>
<select count(*)
from Test1 A, Test2 B
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10
and A.Id = B.Id>
COND-ITEM args: 0 type=[COND_AND_FUNC]
FUNC-ITEM [>=] args: 2 type=[GE_FUNC]
FIELD-ITEM [TestCP] [B] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<12345> name=<12345>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [B] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
Again our function is called twice and in each call it gets the WHERE-part of the SQL-statement but only the part that is relevant to this table.
So let's try a third statement using the usual form of a JOIN:
SQL:
MariaDB [TestCP]> select count(*)
from Test1 A
JOIN Test2 B ON (A.Id = B.Id)
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10
console-output:
table_name = <Test1>
<select count(*)
from Test1 A
JOIN Test2 B ON (A.Id = B.Id)
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10>
COND-ITEM args: 0 type=[COND_AND_FUNC]
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [A] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [A] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
table_name = <Test2>
<select count(*)
from Test1 A
JOIN Test2 B ON (A.Id = B.Id)
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10>
COND-ITEM args: 0 type=[COND_AND_FUNC]
FUNC-ITEM [>=] args: 2 type=[GE_FUNC]
FIELD-ITEM [TestCP] [B] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<12345> name=<12345>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [B] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
and finally
You have seen how to access the WHERE-clause of the SQL-statement. Use this code and play with your own statements. Expand the code or rewrite it. No more code-examples will follow in this post.
some explanations
As I have shown the server takes the text of the SQL-statement and brings it in a form which we can access. For every table referenced in the SQL-statement it calls the function cond_push() of the appropriate storage-engine and gives to it the part of the WHERE-clause relevant to this table.
For this purpose a lot of classes are defined in MySQL and MariaDB. If you want to get an overview please look at this page.
You have to scroll around a bit to see something on this page but don't let this page and the diagrams confuse you. This looks complicated but it's not, so don't panic.
Everything starts with the class Item and the other classes are derived from this. Each class has a function type() which returns a constant that identifies the class. In the code-examples I presented above you will see a big switch-statement that uses this information to jump to the correct branch. All these branches contain an fprintf()-statement. This simply tells what type of condition is found in this position of the tree. In some cases you will see some more lines of code, because I was interested in more details. These case-statements are of the types:
- FIELD_ITEM
- FUNC_ITEM
- COND_ITEM
- INT_ITEM
- STRING_ITEM
We started with an Item * but in these cases I wanted to access information defined in derived classes so castings are needed in these branches. The corresponding classes are:
- Item_field
- Item_func
- Item_cond
- Item_int
- Item_string
If you look at the hierarchy-graph you will see that:
- Item_field -> Item_ident -> Item
- Item_func -> Item_result_field -> Item
- Item_cond -> Item_bool_func -> Item_int_func -> Item_func -> Item_result_field -> Item
- Item_int -> Item_num -> Item_basic_constant -> Item
- Item_string -> Item_basic_constant> -> Item
In the branch representing the type FIELD_ITEM the code prints the members db_name, table_name and field_name of the class Item_ident.
The WHERE-clause contains comparisons and in the branch FUNC_ITEM I've shown the symbol representing the comparison by calling func_name() of the class Item_result_field. Next comes the the number of arguments in this comparison by calling Item_func::->argument_count(). Finally comes the type of comparison by getting a constant from Item_func::functype() and converting this into a readable form.
The class Item_cond is derived from Item_func so by printing data about this object one will use functions from the clas Item_func.
I also used integer-constants in the SQL-statements and printed them in the branch INT_ITEM by accessing the member-variable value of the class Item_int.
And I used string-constants in the SQL-statements and printed them in the branch STRING_ITEM by accessing the function str_value() of the class Item.
There are a lot more classes defined in MySQL and MariaDB and a lot of functions in these classes for dealing with these classes. I don't want to go deeper into the details here but these classes will help us to gain access to the information we are interested in.
And one last hint: the function cond_push() is given the WHERE-clause in the form of an argument of type COND *, which is defined in handler.h as:
/* Forward declaration for condition pushdown to storage engine */
typedef class Item COND;
so COND is just another name for an Item.
This post got a bit lengthy so that's enough for today.
update
2014/05/13: If you want to see an example how this information can be used please look into this post.
2014/05/24: you will find a similar example in this post.
2017/05/10: and there's one more example: subselect: execution (4)
2017/05/10: you will find a corrected version of the code presented here in this file: WHERE.cc
<select count(*)
from Test1 A, Test2 B
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10
and A.Id = B.Id>
COND-ITEM args: 0 type=[COND_AND_FUNC]
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [A] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [A] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
table_name = <Test2>
<select count(*)
from Test1 A, Test2 B
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10
and A.Id = B.Id>
COND-ITEM args: 0 type=[COND_AND_FUNC]
FUNC-ITEM [>=] args: 2 type=[GE_FUNC]
FIELD-ITEM [TestCP] [B] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<12345> name=<12345>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [B] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
Again our function is called twice and in each call it gets the WHERE-part of the SQL-statement but only the part that is relevant to this table.
So let's try a third statement using the usual form of a JOIN:
SQL:
MariaDB [TestCP]> select count(*)
from Test1 A
JOIN Test2 B ON (A.Id = B.Id)
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10
console-output:table_name = <Test1>
<select count(*)
from Test1 A
JOIN Test2 B ON (A.Id = B.Id)
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10>
COND-ITEM args: 0 type=[COND_AND_FUNC]
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [A] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<1234> name=<1234>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [A] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
table_name = <Test2>
<select count(*)
from Test1 A
JOIN Test2 B ON (A.Id = B.Id)
where A.Hersteller = '1234'
and A.HAP = 10
and B.Hersteller >= '12345'
and B.HAP = 10>
COND-ITEM args: 0 type=[COND_AND_FUNC]
FUNC-ITEM [>=] args: 2 type=[GE_FUNC]
FIELD-ITEM [TestCP] [B] [Hersteller] name=<Hersteller>
STRING-ITEM str_value=<12345> name=<12345>
FUNC-ITEM [=] args: 2 type=[EQ_FUNC]
FIELD-ITEM [TestCP] [B] [HAP] name=<HAP>
INT-ITEM val = 10 res=2 name=<10>
and finally
You have seen how to access the WHERE-clause of the SQL-statement. Use this code and play with your own statements. Expand the code or rewrite it. No more code-examples will follow in this post.some explanations
As I have shown the server takes the text of the SQL-statement and brings it in a form which we can access. For every table referenced in the SQL-statement it calls the function cond_push() of the appropriate storage-engine and gives to it the part of the WHERE-clause relevant to this table.For this purpose a lot of classes are defined in MySQL and MariaDB. If you want to get an overview please look at this page.
You have to scroll around a bit to see something on this page but don't let this page and the diagrams confuse you. This looks complicated but it's not, so don't panic.
Everything starts with the class Item and the other classes are derived from this. Each class has a function type() which returns a constant that identifies the class. In the code-examples I presented above you will see a big switch-statement that uses this information to jump to the correct branch. All these branches contain an fprintf()-statement. This simply tells what type of condition is found in this position of the tree. In some cases you will see some more lines of code, because I was interested in more details. These case-statements are of the types:
- FIELD_ITEM
- FUNC_ITEM
- COND_ITEM
- INT_ITEM
- STRING_ITEM
- Item_field
- Item_func
- Item_cond
- Item_int
- Item_string
- Item_field -> Item_ident -> Item
- Item_func -> Item_result_field -> Item
- Item_cond -> Item_bool_func -> Item_int_func -> Item_func -> Item_result_field -> Item
- Item_int -> Item_num -> Item_basic_constant -> Item
- Item_string -> Item_basic_constant> -> Item
In the branch representing the type FIELD_ITEM the code prints the members db_name, table_name and field_name of the class Item_ident.
The WHERE-clause contains comparisons and in the branch FUNC_ITEM I've shown the symbol representing the comparison by calling func_name() of the class Item_result_field. Next comes the the number of arguments in this comparison by calling Item_func::->argument_count(). Finally comes the type of comparison by getting a constant from Item_func::functype() and converting this into a readable form.
The class Item_cond is derived from Item_func so by printing data about this object one will use functions from the clas Item_func.
I also used integer-constants in the SQL-statements and printed them in the branch INT_ITEM by accessing the member-variable value of the class Item_int.
And I used string-constants in the SQL-statements and printed them in the branch STRING_ITEM by accessing the function str_value() of the class Item.
There are a lot more classes defined in MySQL and MariaDB and a lot of functions in these classes for dealing with these classes. I don't want to go deeper into the details here but these classes will help us to gain access to the information we are interested in.
And one last hint: the function cond_push() is given the WHERE-clause in the form of an argument of type COND *, which is defined in handler.h as:
/* Forward declaration for condition pushdown to storage engine */
typedef class Item COND;
so COND is just another name for an Item.This post got a bit lengthy so that's enough for today.
update
2014/05/13: If you want to see an example how this information can be used please look into this post.2014/05/24: you will find a similar example in this post.
2017/05/10: and there's one more example: subselect: execution (4)
2017/05/10: you will find a corrected version of the code presented here in this file: WHERE.cc