Saturday, June 21, 2008

Re: [PATCHES] Simplify formatting.c

Index: src/backend/utils/adt/formatting.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.142
diff -c -c -r1.142 formatting.c
*** src/backend/utils/adt/formatting.c 17 Jun 2008 16:09:06 -0000 1.142
--- src/backend/utils/adt/formatting.c 21 Jun 2008 20:00:45 -0000
***************
*** 1499,1526 ****
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
wchar_t *workspace;
! text *in_text;
! text *out_text;
! int i;

! in_text = cstring_to_text(buff);
! workspace = texttowcs(in_text);

! for (i = 0; workspace[i] != 0; i++)
{
if (wasalnum)
! workspace[i] = towlower(workspace[i]);
else
! workspace[i] = towupper(workspace[i]);
! wasalnum = iswalnum(workspace[i]);
}

! out_text = wcstotext(workspace, i);
! result = text_to_cstring(out_text);

pfree(workspace);
- pfree(in_text);
- pfree(out_text);
}
else
#endif /* USE_WIDE_UPPER_LOWER */
--- 1499,1525 ----
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
wchar_t *workspace;
! int curr_char = 0;

! /* Output workspace cannot have more codes than input bytes */
! workspace = (wchar_t *) palloc((strlen(buff) + 1) * sizeof(wchar_t));

! char2wchar(workspace, strlen(buff) + 1, buff, strlen(buff) + 1);
!
! for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
if (wasalnum)
! workspace[curr_char] = towlower(workspace[curr_char]);
else
! workspace[curr_char] = towupper(workspace[curr_char]);
! wasalnum = iswalnum(workspace[curr_char]);
}

! /* Make result large enough; case change might change number of bytes */
! result = palloc(curr_char * MB_CUR_MAX + 1);

+ wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
pfree(workspace);
}
else
#endif /* USE_WIDE_UPPER_LOWER */
Index: src/backend/utils/adt/oracle_compat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v
retrieving revision 1.80
diff -c -c -r1.80 oracle_compat.c
*** src/backend/utils/adt/oracle_compat.c 17 Jun 2008 16:09:06 -0000 1.80
--- src/backend/utils/adt/oracle_compat.c 21 Jun 2008 20:00:45 -0000
***************
*** 467,530 ****
Datum
initcap(PG_FUNCTION_ARGS)
{
! #ifdef USE_WIDE_UPPER_LOWER

! /*
! * Use wide char code only when max encoding length > 1 and ctype != C.
! * Some operating systems fail with multi-byte encodings and a C locale.
! * Also, for a C locale there is no need to process as multibyte.
! */
! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! {
! text *string = PG_GETARG_TEXT_PP(0);
! text *result;
! wchar_t *workspace;
! int wasalnum = 0;
! int i;
!
! workspace = texttowcs(string);
!
! for (i = 0; workspace[i] != 0; i++)
! {
! if (wasalnum)
! workspace[i] = towlower(workspace[i]);
! else
! workspace[i] = towupper(workspace[i]);
! wasalnum = iswalnum(workspace[i]);
! }
!
! result = wcstotext(workspace, i);
!
! pfree(workspace);
!
! PG_RETURN_TEXT_P(result);
! }
! else
! #endif /* USE_WIDE_UPPER_LOWER */
! {
! text *string = PG_GETARG_TEXT_P_COPY(0);
! int wasalnum = 0;
! char *ptr;
! int m;
!
! /*
! * Since we copied the string, we can scribble directly on the value
! */
! ptr = VARDATA(string);
! m = VARSIZE(string) - VARHDRSZ;

! while (m-- > 0)
! {
! if (wasalnum)
! *ptr = tolower((unsigned char) *ptr);
! else
! *ptr = toupper((unsigned char) *ptr);
! wasalnum = isalnum((unsigned char) *ptr);
! ptr++;
! }
!
! PG_RETURN_TEXT_P(string);
! }
}


--- 467,482 ----
Datum
initcap(PG_FUNCTION_ARGS)
{
! char *in_string, *out_string;
! text *result;

! in_string = text_to_cstring(PG_GETARG_TEXT_PP(0));
! out_string = str_initcap(in_string);
! pfree(in_string);
! result = cstring_to_text(out_string);
! pfree(out_string);

! PG_RETURN_TEXT_P(result);
}


Bruce Momjian wrote:
> Bruce Momjian wrote:
> > Alvaro Herrera wrote:
> > > Bruce Momjian wrote:
> > >
> > > > I moved str_initcap() over into oracle_compat.c and then had initcap()
> > > > convert to/from TEXT to call it. The code is a little weird because
> > > > str_initcap() needs to convert to text to use texttowcs(), so in
> > > > multibyte encodings initcap converts the string to text, then to char,
> > > > then to text to call texttowcs(). I didn't see a cleaner way to do
> > > > this.
> > >
> > > Why not use wchar2char? It seems there's room for extra cleanup here.
> > >
> > > Also, the prototype of str_initcap in builtins.h looks out of place.
> >
> > I talked to Alvaro on IM, and there is certainly much more cleanup to do
> > in this area. I will work from the bottom up. First, is moving the
> > USE_WIDE_UPPER_LOWER define to c.h, and removing TS_USE_WIDE and using
> > USE_WIDE_UPPER_LOWER instead. Patch attached and applied.
>
> The second step is to move wchar2char() and char2wchar() from tsearch
> into /mb to be easier to use for other modules; also move pnstrdup().

The third step is for oracle_compat.c::initcap() to use
formatting.c::str_initcap(). You can see the result; patch attached
(not applied).

This greatly reduces the size of initcap(), with the downside that we
are making two extra copies of the string to convert it to/from char*.

Is this acceptable? If it is I will do the same for uppper()/lower()
with similar code size reduction and modularity.

If not perhaps I should keep the non-multibyte code in initcap() and
have only the multi-byte use str_initcap().

--
Bruce Momjian <bruce@momjian.us>

http://momjian.us

EnterpriseDB

http://enterprisedb.com

+ If your life is a hard drive, Christ can be your backup. +

No comments: