Subject | Re: UDF design would need array parameter |
---|---|
Author | Ian A. Newby |
Post date | 2005-01-20T15:27Z |
Hi Urs,
I have a stored procedure to do what you want. It requires some of the
standard UDFs to do what you want. It may not be much better however.
CREATE PROCEDURE SP_TOKENIZER (
ORIGINALSTRING VARCHAR(20480) CHARACTER SET NONE,
TOKENS VARCHAR(30) CHARACTER SET NONE)
RETURNS (
TOKEN VARCHAR(1024) CHARACTER SET NONE)
AS
DECLARE VARIABLE COUNTER INTEGER;
DECLARE VARIABLE START INTEGER;
DECLARE VARIABLE FINISH INTEGER;
DECLARE VARIABLE CURRENTCHAR CHAR(1);
DECLARE VARIABLE WHITESPACE CHAR(1);
begin
WHITESPACE = 'F';
TOKEN = null;
if ((TOKENS is null) or (TOKENS = '')) then
begin
TOKENS = ascii_char(9) || ascii_char(10) || ascii_char(11)
|| ascii_char(12) || ascii_char(13) || ' (),.";:[]?!|#{}''';
end
ORIGINALSTRING = cast(substr(TOKENS,1,1) as char(1)) ||
ORIGINALSTRING || cast(substr(TOKENS,1,1) as char(1)) ||
cast(substr(TOKENS,1,1) as char(1));
COUNTER = 1;
START = -1;
FINISH = -1;
while (COUNTER < strlen(ORIGINALSTRING)) do
begin
CURRENTCHAR = substr(ORIGINALSTRING,COUNTER,COUNTER);
if (TOKENS containing CURRENTCHAR) then
begin
if (WHITESPACE='F') then
begin
if (START > -1) then
begin
FINISH = COUNTER -1;
if ((FINISH - START) >1024) then
/* Don't return tokens which are too long! */
START = -1;
else
begin
TOKEN = substr(ORIGINALSTRING, START, FINISH);
START = -1;
suspend;
end
end
WHITESPACE = 'T';
end
end
else
begin
if (WHITESPACE = 'T') then
START = COUNTER;
WHITESPACE = 'F';
end
COUNTER = COUNTER + 1;
end
exit;
end;
Regards
Ian Newby
I have a stored procedure to do what you want. It requires some of the
standard UDFs to do what you want. It may not be much better however.
CREATE PROCEDURE SP_TOKENIZER (
ORIGINALSTRING VARCHAR(20480) CHARACTER SET NONE,
TOKENS VARCHAR(30) CHARACTER SET NONE)
RETURNS (
TOKEN VARCHAR(1024) CHARACTER SET NONE)
AS
DECLARE VARIABLE COUNTER INTEGER;
DECLARE VARIABLE START INTEGER;
DECLARE VARIABLE FINISH INTEGER;
DECLARE VARIABLE CURRENTCHAR CHAR(1);
DECLARE VARIABLE WHITESPACE CHAR(1);
begin
WHITESPACE = 'F';
TOKEN = null;
if ((TOKENS is null) or (TOKENS = '')) then
begin
TOKENS = ascii_char(9) || ascii_char(10) || ascii_char(11)
|| ascii_char(12) || ascii_char(13) || ' (),.";:[]?!|#{}''';
end
ORIGINALSTRING = cast(substr(TOKENS,1,1) as char(1)) ||
ORIGINALSTRING || cast(substr(TOKENS,1,1) as char(1)) ||
cast(substr(TOKENS,1,1) as char(1));
COUNTER = 1;
START = -1;
FINISH = -1;
while (COUNTER < strlen(ORIGINALSTRING)) do
begin
CURRENTCHAR = substr(ORIGINALSTRING,COUNTER,COUNTER);
if (TOKENS containing CURRENTCHAR) then
begin
if (WHITESPACE='F') then
begin
if (START > -1) then
begin
FINISH = COUNTER -1;
if ((FINISH - START) >1024) then
/* Don't return tokens which are too long! */
START = -1;
else
begin
TOKEN = substr(ORIGINALSTRING, START, FINISH);
START = -1;
suspend;
end
end
WHITESPACE = 'T';
end
end
else
begin
if (WHITESPACE = 'T') then
START = COUNTER;
WHITESPACE = 'F';
end
COUNTER = COUNTER + 1;
end
exit;
end;
Regards
Ian Newby