Subject | fts attempt/can this be improved? |
---|---|
Author | markd_mms |
Post date | 2008-09-23T02:25:13Z |
I've created some tables in an attempt to simulate full text searching
(minus word stemming) in my program. The table are structed like so:
CREATE TABLE FTS$SCORE (
WORDID BIGINT DEFAULT 0 NOT NULL,
PRODUCTID INTEGER DEFAULT 0 NOT NULL,
"POSITION" SMALLINT DEFAULT 0 NOT NULL,
FIELD_NAME VARCHAR(32) CHARACTER SET ISO8859_1 NOT NULL COLLATE
EN_UK);
ALTER TABLE FTS$SCORE ADD CONSTRAINT PK_FTS$SCORE PRIMARY KEY
(WORDID,PRODUCTID,"POSITION");
ALTER TABLE FTS$SCORE ADD CONSTRAINT FK_FTS$SCORE_FTS$WORD FOREIGN KEY
(WORDID) REFERENCES FTS$WORD(WORDID) ON DELETE CASCADE ON UPDATE
CASCADE;
ALTER TABLE FTS$SCORE ADD CONSTRAINT FK_FTS$SCORE_PRODUCT FOREIGN KEY
(PRODUCTID) REFERENCES PRODUCT(PRODUCTID) ON DELETE CASCADE ON UPDATE
CASCADE;
CREATE INDEX IDX_FTS$SCORE_FIELD_NAME ON FTS$SCORE(FIELD_NAME);
and...
CREATE TABLE FTS$WORD (
WORDID BIGINT NOT NULL,
WORD VARCHAR(80) CHARACTER SET ISO8859_1 NOT NULL COLLATE EN_UK);
ALTER TABLE FTS$WORD ADD CONSTRAINT PK_FTS$WORD PRIMARY KEY (WORDID);
CREATE UNIQUE INDEX IDX_FTS$WORD_WORD ON FTS$WORD(WORD);
and I would query it like so:
SELECT DISTINCT
PV.SECTION,
PV.PARTICIPANT,
PV.TITLE,
PV.TITLE2,
PD.RELEASE_DATE,
PD.DELETE_DATE,
CO.NAME AS DISTRIBUTOR,
PD.CATALOGUENO,
PD.APN,
RA.CODE AS RATING,
PRC.PRICE,
PV.PRODUCTID,
PV.SORT_PARTICIPANT,
PV.SORT_TITLE,
PD.DETAILID,
ME.CODE AS MEDIUM,
EM.CODE AS EXT_MEDIUM,
PD.DESCRIPTION
FROM PRODUCT_VIEW PV
JOIN FTS$SCORE S1 ON PV.PRODUCTID = S1.PRODUCTID
JOIN FTS$SCORE S2 ON PV.PRODUCTID = S2.PRODUCTID
JOIN FTS$SCORE S3 ON PV.PRODUCTID = S3.PRODUCTID
JOIN FTS$SCORE S4 ON PV.PRODUCTID = S4.PRODUCTID
JOIN FTS$SCORE S5 ON PV.PRODUCTID = S5.PRODUCTID
JOIN FTS$SCORE S6 ON PV.PRODUCTID = S6.PRODUCTID
JOIN PRODUCT_DETAIL PD ON PV.PRODUCTID = PD.PRODUCTID
JOIN COMPANY CO ON PD.DISTRIBUTORID = CO.COMPANYID
JOIN MEDIUM ME ON PD.MEDIUMID = ME.MEDIUMID
LEFT JOIN RATING RA ON PD.RATINGID = RA.RATINGID
LEFT JOIN PRICE PRC ON PD.PRICEID = PRC.PRICEID
LEFT JOIN EXTENDED_MEDIUM EM ON PD.EXTENDED_MEDIUMID = EM.EXTENDEDID
WHERE
(S1.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'SINGIN')
AND S1.FIELD_NAME = 'TRACK') AND
(S2.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'IN') AND
S2.FIELD_NAME = 'TRACK') AND
(S3.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'THE') AND
S3.FIELD_NAME = 'TRACK') AND
(S4.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'RAIN') AND
S4.FIELD_NAME = 'TRACK') AND
(S5.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'GENE') AND
S5.FIELD_NAME = 'TRACK_ARTIST') AND
(S6.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'KELLY') AND
S6.FIELD_NAME = 'TRACK_ARTIST') AND
S1.PRODUCTID = S2.PRODUCTID AND
S2.PRODUCTID = S3.PRODUCTID AND
S3.PRODUCTID = S4.PRODUCTID AND
S4.PRODUCTID = S5.PRODUCTID AND
S5.PRODUCTID = S6.PRODUCTID AND
S1."POSITION"+1 = S2."POSITION" AND
S2."POSITION"+1 = S3."POSITION" AND
S3."POSITION"+1 = S4."POSITION" AND
S5."POSITION"+1 = S6."POSITION"
ORDER BY
PV.SORT_PARTICIPANT,
PV.SORT_TITLE,
PV.TITLE2
which results in the following plan:
Plan:
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN SORT (SORT (JOIN (JOIN (JOIN (JOIN (ME NATURAL, PD INDEX
(FK_PRODUCT_DETAIL_MEDIUM), CO INDEX (PK_COMPANY), PV INDEX
(FK_PRODUCT_VIEW_PRODUCT), S1 INDEX (PK_FTS$SCORE), S2 INDEX
(PK_FTS$SCORE), S3 INDEX (PK_FTS$SCORE), S4 INDEX (PK_FTS$SCORE), S5
INDEX (PK_FTS$SCORE), S6 INDEX (PK_FTS$SCORE)), RA INDEX (PK_RATING)),
PRC INDEX (PK_PRICE)), EM INDEX (PK_EXTENDED_MEDIUM))))
The queries don't take too long but I was wondering if there was a
better way of structuring the table or queries or if what I'm
currently doing is basically ok?
TIA
[Non-text portions of this message have been removed]
(minus word stemming) in my program. The table are structed like so:
CREATE TABLE FTS$SCORE (
WORDID BIGINT DEFAULT 0 NOT NULL,
PRODUCTID INTEGER DEFAULT 0 NOT NULL,
"POSITION" SMALLINT DEFAULT 0 NOT NULL,
FIELD_NAME VARCHAR(32) CHARACTER SET ISO8859_1 NOT NULL COLLATE
EN_UK);
ALTER TABLE FTS$SCORE ADD CONSTRAINT PK_FTS$SCORE PRIMARY KEY
(WORDID,PRODUCTID,"POSITION");
ALTER TABLE FTS$SCORE ADD CONSTRAINT FK_FTS$SCORE_FTS$WORD FOREIGN KEY
(WORDID) REFERENCES FTS$WORD(WORDID) ON DELETE CASCADE ON UPDATE
CASCADE;
ALTER TABLE FTS$SCORE ADD CONSTRAINT FK_FTS$SCORE_PRODUCT FOREIGN KEY
(PRODUCTID) REFERENCES PRODUCT(PRODUCTID) ON DELETE CASCADE ON UPDATE
CASCADE;
CREATE INDEX IDX_FTS$SCORE_FIELD_NAME ON FTS$SCORE(FIELD_NAME);
and...
CREATE TABLE FTS$WORD (
WORDID BIGINT NOT NULL,
WORD VARCHAR(80) CHARACTER SET ISO8859_1 NOT NULL COLLATE EN_UK);
ALTER TABLE FTS$WORD ADD CONSTRAINT PK_FTS$WORD PRIMARY KEY (WORDID);
CREATE UNIQUE INDEX IDX_FTS$WORD_WORD ON FTS$WORD(WORD);
and I would query it like so:
SELECT DISTINCT
PV.SECTION,
PV.PARTICIPANT,
PV.TITLE,
PV.TITLE2,
PD.RELEASE_DATE,
PD.DELETE_DATE,
CO.NAME AS DISTRIBUTOR,
PD.CATALOGUENO,
PD.APN,
RA.CODE AS RATING,
PRC.PRICE,
PV.PRODUCTID,
PV.SORT_PARTICIPANT,
PV.SORT_TITLE,
PD.DETAILID,
ME.CODE AS MEDIUM,
EM.CODE AS EXT_MEDIUM,
PD.DESCRIPTION
FROM PRODUCT_VIEW PV
JOIN FTS$SCORE S1 ON PV.PRODUCTID = S1.PRODUCTID
JOIN FTS$SCORE S2 ON PV.PRODUCTID = S2.PRODUCTID
JOIN FTS$SCORE S3 ON PV.PRODUCTID = S3.PRODUCTID
JOIN FTS$SCORE S4 ON PV.PRODUCTID = S4.PRODUCTID
JOIN FTS$SCORE S5 ON PV.PRODUCTID = S5.PRODUCTID
JOIN FTS$SCORE S6 ON PV.PRODUCTID = S6.PRODUCTID
JOIN PRODUCT_DETAIL PD ON PV.PRODUCTID = PD.PRODUCTID
JOIN COMPANY CO ON PD.DISTRIBUTORID = CO.COMPANYID
JOIN MEDIUM ME ON PD.MEDIUMID = ME.MEDIUMID
LEFT JOIN RATING RA ON PD.RATINGID = RA.RATINGID
LEFT JOIN PRICE PRC ON PD.PRICEID = PRC.PRICEID
LEFT JOIN EXTENDED_MEDIUM EM ON PD.EXTENDED_MEDIUMID = EM.EXTENDEDID
WHERE
(S1.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'SINGIN')
AND S1.FIELD_NAME = 'TRACK') AND
(S2.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'IN') AND
S2.FIELD_NAME = 'TRACK') AND
(S3.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'THE') AND
S3.FIELD_NAME = 'TRACK') AND
(S4.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'RAIN') AND
S4.FIELD_NAME = 'TRACK') AND
(S5.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'GENE') AND
S5.FIELD_NAME = 'TRACK_ARTIST') AND
(S6.WORDID = (SELECT WORDID FROM FTS$WORD WHERE WORD = 'KELLY') AND
S6.FIELD_NAME = 'TRACK_ARTIST') AND
S1.PRODUCTID = S2.PRODUCTID AND
S2.PRODUCTID = S3.PRODUCTID AND
S3.PRODUCTID = S4.PRODUCTID AND
S4.PRODUCTID = S5.PRODUCTID AND
S5.PRODUCTID = S6.PRODUCTID AND
S1."POSITION"+1 = S2."POSITION" AND
S2."POSITION"+1 = S3."POSITION" AND
S3."POSITION"+1 = S4."POSITION" AND
S5."POSITION"+1 = S6."POSITION"
ORDER BY
PV.SORT_PARTICIPANT,
PV.SORT_TITLE,
PV.TITLE2
which results in the following plan:
Plan:
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN (FTS$WORD INDEX (IDX_FTS$WORD_WORD))
PLAN SORT (SORT (JOIN (JOIN (JOIN (JOIN (ME NATURAL, PD INDEX
(FK_PRODUCT_DETAIL_MEDIUM), CO INDEX (PK_COMPANY), PV INDEX
(FK_PRODUCT_VIEW_PRODUCT), S1 INDEX (PK_FTS$SCORE), S2 INDEX
(PK_FTS$SCORE), S3 INDEX (PK_FTS$SCORE), S4 INDEX (PK_FTS$SCORE), S5
INDEX (PK_FTS$SCORE), S6 INDEX (PK_FTS$SCORE)), RA INDEX (PK_RATING)),
PRC INDEX (PK_PRICE)), EM INDEX (PK_EXTENDED_MEDIUM))))
The queries don't take too long but I was wondering if there was a
better way of structuring the table or queries or if what I'm
currently doing is basically ok?
TIA
[Non-text portions of this message have been removed]