Subject | Contribution to repository: tokenize |
---|---|
Author | robert_p_levy |
Post date | 2006-04-07T21:52:22Z |
Hello firebird developers,
I wrote this code almost 2 years ago, but since it has withstood the
test of time and across different firebird database applications, I
feel it is general and useful enough to warrant exposing to the world
at large.
Notes:
1. It requires ascii_char external function from IB_UDF.
2. MYSTRLEN was implemented as a stored procedure in order to work
around the limitations of existing widely used strlen external
functions.
3. The main procedure is called as a selectable store procedure. Give
it two arguments: a delimited list and a delimiter
EXAMPLE: select ID,TKN from TOKENIZE('red,blue,green,yellow',',');
4. I have found that using it in a join is a nice and powerful
application of the procedure. See ADDCSV procedure defined below
which adds 2 lists of integers.
5. Please also direct questions and comments to
robert.levy@...
...DDL begins here...
drop procedure ADDCSV;
drop procedure TOKENIZE;
drop procedure MYSTRLEN;
drop EXTERNAL FUNCTION ascii_char;
DECLARE EXTERNAL FUNCTION ascii_char
INTEGER
RETURNS CSTRING(1) FREE_IT
ENTRY_POINT 'IB_UDF_ascii_char' MODULE_NAME 'ib_udf';
set TERM ^! ;
create procedure MYSTRLEN(S varchar(10000)) returns (LEN integer) as
declare variable FIRSTCHAR char(1);
declare variable ENDCHAR char(1);
begin
LEN=1;
ENDCHAR=ascii_char(25);
S=''''||S||ENDCHAR||'''';
execute statement 'select SUBSTRING(' || S || ' from ' || cast(LEN
as varchar(8)) || ' for 1) from rdb$database'
into :FIRSTCHAR;
while (FIRSTCHAR<>ENDCHAR) do
begin
execute statement 'select SUBSTRING(' || S || ' from ' || cast
(LEN as varchar(8)) || ' for 1) from rdb$database'
into :FIRSTCHAR;
LEN=LEN+1;
end
LEN=LEN-1;
SUSPEND;
end ^!
create procedure TOKENIZE (S varchar(10000), DELIM char(1))
returns (ID integer, TKN varchar(10000)) AS
declare variable I integer;
declare variable LEN integer;
declare variable FIRSTCHAR char(1);
declare variable S2 varchar(10000);
begin
I = 1;
ID = 0;
FIRSTCHAR='';
TKN='';
select LEN from MYSTRLEN(:S) into :LEN;
while (I <= LEN) do
begin
execute statement
'select SUBSTRING(''' || S || ''' from ' || cast(I as varchar
(8)) || ' for 1) from rdb$database'
into :FIRSTCHAR;
if (FIRSTCHAR=DELIM) then
begin
ID=ID+1;
SUSPEND;
TKN = '';
end
else TKN = (TKN||FIRSTCHAR);
I = I + 1;
end
ID=ID+1;
SUSPEND;
end ^!
/*
EXAMPLE USAGE OF TOKENIZE
Add together two commma-delimited lists of INTEGERS and return a list
of INTEGERS.
*/
create procedure ADDCSV(CSV1 varchar(10000), CSV2 varchar(10000))
returns (TOTCSV varchar(10000)) as
declare variable TOKEN1 varchar(10000);
declare variable TOKEN2 varchar(10000);
begin
TOTCSV='';
for select t1.TKN, t2.TKN from TOKENIZE(:CSV1,',') t1 join TOKENIZE
(:CSV2,',') t2 on (t1.ID=t2.ID)
into :TOKEN1, :TOKEN2 do
begin
if (TOTCSV<>'') then TOTCSV = TOTCSV || ',';
TOTCSV = TOTCSV || cast(cast(TOKEN1 as integer) + cast(TOKEN1 as
integer) as varchar(10000));
end
suspend;
end
^!
set TERM ; ^!
I wrote this code almost 2 years ago, but since it has withstood the
test of time and across different firebird database applications, I
feel it is general and useful enough to warrant exposing to the world
at large.
Notes:
1. It requires ascii_char external function from IB_UDF.
2. MYSTRLEN was implemented as a stored procedure in order to work
around the limitations of existing widely used strlen external
functions.
3. The main procedure is called as a selectable store procedure. Give
it two arguments: a delimited list and a delimiter
EXAMPLE: select ID,TKN from TOKENIZE('red,blue,green,yellow',',');
4. I have found that using it in a join is a nice and powerful
application of the procedure. See ADDCSV procedure defined below
which adds 2 lists of integers.
5. Please also direct questions and comments to
robert.levy@...
...DDL begins here...
drop procedure ADDCSV;
drop procedure TOKENIZE;
drop procedure MYSTRLEN;
drop EXTERNAL FUNCTION ascii_char;
DECLARE EXTERNAL FUNCTION ascii_char
INTEGER
RETURNS CSTRING(1) FREE_IT
ENTRY_POINT 'IB_UDF_ascii_char' MODULE_NAME 'ib_udf';
set TERM ^! ;
create procedure MYSTRLEN(S varchar(10000)) returns (LEN integer) as
declare variable FIRSTCHAR char(1);
declare variable ENDCHAR char(1);
begin
LEN=1;
ENDCHAR=ascii_char(25);
S=''''||S||ENDCHAR||'''';
execute statement 'select SUBSTRING(' || S || ' from ' || cast(LEN
as varchar(8)) || ' for 1) from rdb$database'
into :FIRSTCHAR;
while (FIRSTCHAR<>ENDCHAR) do
begin
execute statement 'select SUBSTRING(' || S || ' from ' || cast
(LEN as varchar(8)) || ' for 1) from rdb$database'
into :FIRSTCHAR;
LEN=LEN+1;
end
LEN=LEN-1;
SUSPEND;
end ^!
create procedure TOKENIZE (S varchar(10000), DELIM char(1))
returns (ID integer, TKN varchar(10000)) AS
declare variable I integer;
declare variable LEN integer;
declare variable FIRSTCHAR char(1);
declare variable S2 varchar(10000);
begin
I = 1;
ID = 0;
FIRSTCHAR='';
TKN='';
select LEN from MYSTRLEN(:S) into :LEN;
while (I <= LEN) do
begin
execute statement
'select SUBSTRING(''' || S || ''' from ' || cast(I as varchar
(8)) || ' for 1) from rdb$database'
into :FIRSTCHAR;
if (FIRSTCHAR=DELIM) then
begin
ID=ID+1;
SUSPEND;
TKN = '';
end
else TKN = (TKN||FIRSTCHAR);
I = I + 1;
end
ID=ID+1;
SUSPEND;
end ^!
/*
EXAMPLE USAGE OF TOKENIZE
Add together two commma-delimited lists of INTEGERS and return a list
of INTEGERS.
*/
create procedure ADDCSV(CSV1 varchar(10000), CSV2 varchar(10000))
returns (TOTCSV varchar(10000)) as
declare variable TOKEN1 varchar(10000);
declare variable TOKEN2 varchar(10000);
begin
TOTCSV='';
for select t1.TKN, t2.TKN from TOKENIZE(:CSV1,',') t1 join TOKENIZE
(:CSV2,',') t2 on (t1.ID=t2.ID)
into :TOKEN1, :TOKEN2 do
begin
if (TOTCSV<>'') then TOTCSV = TOTCSV || ',';
TOTCSV = TOTCSV || cast(cast(TOKEN1 as integer) + cast(TOKEN1 as
integer) as varchar(10000));
end
suspend;
end
^!
set TERM ; ^!