Subject | How firebird use indexes with inner join |
---|---|
Author | leroyarnaud29 |
Post date | 2012-08-14T07:52:56Z |
hello,
it is normal that firebird uses uses only indexes one of two tables in inner join request ?
after some tests , i noticed that firebird uses the indexes of the table that have the index with the best selectivity.
this is a little exemple :
two table A AND B with a relation 1-n A have 10000 lines and B have 5000000. each line in A have 500 lines in B.
Structures :
CREATE TABLE A (
ID INTEGER NOT NULL,
DATEDOC TIMESTAMP NOT NULL,
WHEREDOC VARCHAR(22) NOT NULL
);
ALTER TABLE A ADD CONSTRAINT PK_A PRIMARY KEY (ID);
CREATE INDEX A_IDX1 ON A (DATEDOC);
CREATE INDEX A_IDX2 ON A (WHEREDOC);
CREATE TABLE B (
REFA INTEGER NOT NULL,
NBWORD INTEGER NOT NULL,
REFWROD VARCHAR(50) NOT NULL,
DATEWORD TIMESTAMP NOT NULL
);
CREATE INDEX B_IDX1 ON B (REFA);
CREATE INDEX B_IDX2 ON B (NBWORD);
CREATE INDEX B_IDX3 ON B (REFWROD);
CREATE INDEX B_IDX4 ON B (DATEWORD);
PROCEDURES TO POPULATE :
create or alter procedure POPULATE_A_B
AS
declare variable nba integer;
declare variable nbb integer;
declare variable nbc integer;
declare variable tmptime timestamp;
declare variable rf1 varchar(5);
declare variable rf2 varchar(5);
declare variable rf3 varchar(5);
declare variable rf4 varchar(5);
declare variable rf5 varchar(5);
declare variable rf6 varchar(50);
begin
nba = 0;
nbb = 0;
rf1 = 'qw';
rf2 = 'er';
rf3 = 'ty';
rf4 = 'ui';
rf5 = 'op';
while (nba < 10000) do begin
tmptime = 'now';
tmptime = tmptime + (1.0000*nba)/(24*60);
insert into A (ID, DATEDOC, WHEREDOC)
values (:nba, :tmptime, 'nowhere');
nbb = 0;
while (nbb < 500) do begin
nbc = mod(nbb,5);
if (nbc = 0) then
rf6 = rf1;
if (nbc = 1) then
rf6 = rf2;
if (nbc = 2) then
rf6 = rf3;
if (nbc = 3) then
rf6 = rf4;
if (nbc = 4) then
rf6 = rf5;
insert into B (REFA, NBWORD, REFWROD, DATEWORD)
values (:nba, 1, :rf6, 'NOW');
nbb = nbb + 1;
end
nba = nba + 1;
end
end
REQUEST :
select * from a inner join b on ID = refa
where a.datedoc >= '14.08.2012 07:20:00' and a.datedoc <= '14.08.2012
08:20:00' and b.refwrod = 'qw'
PLAN :
PLAN JOIN (A INDEX (A_IDX1), B INDEX (B_IDX1))
SELECTIVITY :
A_IDX1 => 0.00009999
A_IDX2 => 1
B_IDX1 => 0.0000999
B_IDX2 => 1
B_IDX3 => 0.2
B_IDX4 => 0.00001111
As you can see the plan don't use the index on b.refwrod so for each line read into table A, all line with relation with B are read
yet there is an index on refwrod but it doesn't take it. So with this
kind of request a big number of line are read.
an other exemple :
For select someword from other_table where date > :begin and date <
:end goup by refwrod into :word do begin
select sum(nbword) from A INNER JOIN B on id = refa where a.datedoc
so in this case if there are 5000 line in A between :begin and :end
Date there will be 2500000 read in table B for each word.
i don't understand why it don't take the index on refwrod
on table B ?
on the other hand if the selectivity of refwrod's index on table B is better than datedoc's index on Table A it takes only this one (refwrod's index) , never it takes the indexes of both table.
what do you tink about that ?
it is normal that firebird uses uses only indexes one of two tables in inner join request ?
after some tests , i noticed that firebird uses the indexes of the table that have the index with the best selectivity.
this is a little exemple :
two table A AND B with a relation 1-n A have 10000 lines and B have 5000000. each line in A have 500 lines in B.
Structures :
CREATE TABLE A (
ID INTEGER NOT NULL,
DATEDOC TIMESTAMP NOT NULL,
WHEREDOC VARCHAR(22) NOT NULL
);
ALTER TABLE A ADD CONSTRAINT PK_A PRIMARY KEY (ID);
CREATE INDEX A_IDX1 ON A (DATEDOC);
CREATE INDEX A_IDX2 ON A (WHEREDOC);
CREATE TABLE B (
REFA INTEGER NOT NULL,
NBWORD INTEGER NOT NULL,
REFWROD VARCHAR(50) NOT NULL,
DATEWORD TIMESTAMP NOT NULL
);
CREATE INDEX B_IDX1 ON B (REFA);
CREATE INDEX B_IDX2 ON B (NBWORD);
CREATE INDEX B_IDX3 ON B (REFWROD);
CREATE INDEX B_IDX4 ON B (DATEWORD);
PROCEDURES TO POPULATE :
create or alter procedure POPULATE_A_B
AS
declare variable nba integer;
declare variable nbb integer;
declare variable nbc integer;
declare variable tmptime timestamp;
declare variable rf1 varchar(5);
declare variable rf2 varchar(5);
declare variable rf3 varchar(5);
declare variable rf4 varchar(5);
declare variable rf5 varchar(5);
declare variable rf6 varchar(50);
begin
nba = 0;
nbb = 0;
rf1 = 'qw';
rf2 = 'er';
rf3 = 'ty';
rf4 = 'ui';
rf5 = 'op';
while (nba < 10000) do begin
tmptime = 'now';
tmptime = tmptime + (1.0000*nba)/(24*60);
insert into A (ID, DATEDOC, WHEREDOC)
values (:nba, :tmptime, 'nowhere');
nbb = 0;
while (nbb < 500) do begin
nbc = mod(nbb,5);
if (nbc = 0) then
rf6 = rf1;
if (nbc = 1) then
rf6 = rf2;
if (nbc = 2) then
rf6 = rf3;
if (nbc = 3) then
rf6 = rf4;
if (nbc = 4) then
rf6 = rf5;
insert into B (REFA, NBWORD, REFWROD, DATEWORD)
values (:nba, 1, :rf6, 'NOW');
nbb = nbb + 1;
end
nba = nba + 1;
end
end
REQUEST :
select * from a inner join b on ID = refa
where a.datedoc >= '14.08.2012 07:20:00' and a.datedoc <= '14.08.2012
08:20:00' and b.refwrod = 'qw'
PLAN :
PLAN JOIN (A INDEX (A_IDX1), B INDEX (B_IDX1))
SELECTIVITY :
A_IDX1 => 0.00009999
A_IDX2 => 1
B_IDX1 => 0.0000999
B_IDX2 => 1
B_IDX3 => 0.2
B_IDX4 => 0.00001111
As you can see the plan don't use the index on b.refwrod so for each line read into table A, all line with relation with B are read
yet there is an index on refwrod but it doesn't take it. So with this
kind of request a big number of line are read.
an other exemple :
For select someword from other_table where date > :begin and date <
:end goup by refwrod into :word do begin
select sum(nbword) from A INNER JOIN B on id = refa where a.datedoc
>= :begin and a.datedoc <= :end and b.refwrod = :word into :nbwords;end
so in this case if there are 5000 line in A between :begin and :end
Date there will be 2500000 read in table B for each word.
i don't understand why it don't take the index on refwrod
on table B ?
on the other hand if the selectivity of refwrod's index on table B is better than datedoc's index on Table A it takes only this one (refwrod's index) , never it takes the indexes of both table.
what do you tink about that ?