Saturday, September 20, 2008

[HACKERS] Re: Patch for SQL-Standard Interval output and decoupling DateStyle from IntervalStyle

*** a/doc/src/sgml/config.sgml
--- b/doc/src/sgml/config.sgml
***************
*** 4090,4095 **** SET XML OPTION { DOCUMENT | CONTENT };
--- 4090,4117 ----
</listitem>
</varlistentry>

+ <varlistentry id="guc-intervalstyle" xreflabel="IntervalStyle">
+ <term><varname>IntervalStyle</varname> (<type>string</type>)</term>
+ <indexterm>
+ <primary><varname>IntervalStyle</> configuration parameter</primary>
+ </indexterm>
+ <listitem>
+ <para>
+ Sets the display format for interval values.
+ The value <literal>sql_standard</> will output SQL Standard
+ strings when given intervals that conform to the SQL
+ standard (either year-month only or date-time only; and no
+ mixing of positive and negative components).
+ The value <literal>postgres</> will output intervals in
+ a format that matches what old releases had output when
+ the DateStyle was set to <literal>'ISO'</>.
+ The value <literal>postgres_verbose</> will output intervals in
+ a format that matches what old releases had output when
+ the DateStyle was set to <literal>'SQL'</>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-timezone" xreflabel="timezone">
<term><varname>timezone</varname> (<type>string</type>)</term>
<indexterm>
*** a/doc/src/sgml/datatype.sgml
--- b/doc/src/sgml/datatype.sgml
***************
*** 2213,2218 **** January 8 04:05:06 1999 PST
--- 2213,2305 ----
</para>
</sect2>

+ <sect2 id="interval-output">
+ <title>Interval Output</title>
+
+ <indexterm>
+ <primary>interval</primary>
+ <secondary>output format</secondary>
+ <seealso>formatting</seealso>
+ </indexterm>
+
+ <para>
+ The output format of the interval types can be set to one of the four
+ styles <literal>sql_standard</>,
+ <literal>postgres</>, or <literal>postgres_verbose</>.The default
+ is the <literal>postgres</> format.
+ <xref
+ linkend="interval-style-output-table"> shows examples of each
+ output style.
+ </para>
+
+ <para>
+ The <literal>sql_standard</> style will output SQL standard
+ interval literal strings where the value of the interval
+ value consists of only a year-month component or a datetime
+ component (as required by the sql standard). For an interval
+ containing both a year-month and a datetime component, the
+ output will be a SQL Standard unquoted year-month literal
+ string joined to a SQL Standard unquoted datetime literal
+ string with a space in between.
+ </para>
+
+ <para>
+ The <literal>postgres</> style will output intervals that match
+ the style PostgreSQL 8.3 outputed when the <xref linkend="guc-datestyle">
+ parameter was set to <literal>ISO</>.
+ </para>
+
+ <para>
+ The <literal>postgres_verbose</> style will output intervals that match
+ the style PostgreSQL 8.3 outputed when the <xref linkend="guc-datestyle">
+ parameter was set to <literal>SQL</>.
+ </para>
+
+ <table id="interval-style-output-table">
+ <title>Interval Style Example</title>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>Style Specification</entry>
+ <entry>Year-Month Interval</entry>
+ <entry>DateTime Interval</entry>
+ <entry>Nonstandardrd Extended Interval</entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry>sql_standard</entry>
+ <entry>1-2</entry>
+ <entry>3 4:05:06</entry>
+ <entry>-1-2 +3 -4:05:06</entry>
+ </row>
+ <row>
+ <entry>postgres</entry>
+ <entry>1 year 2 mons</entry>
+ <entry>3 days 04:05:06</entry>
+ <entry> -1 years -2 mons +3 days -04:05:06</entry>
+ </row>
+ <row>
+ <entry>postgres_verbose</entry>
+ <entry>@ 1 year 2 mons</entry>
+ <entry>@ 3 days 4 hours 5 mins 6 secs</entry>
+ <entry>@ 1 year 2 mons -3 days 4 hours 5 mins 6 secs ago</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>
+ Note that <literal>sql_standard</> style will only produce strictly
+ standards-conforming string sliterals when given a strictly SQL-standard interval
+ value - meaning that it needs to be a pure year-month or datetime
+ interval and not mix positive and negative components.
+ </para>
+
+ </sect2>
+
+
+
<sect2 id="datatype-timezones">
<title>Time Zones</title>

*** a/src/backend/commands/variable.c
--- b/src/backend/commands/variable.c
***************
*** 229,234 **** assign_datestyle(const char *value, bool doit, GucSource source)
--- 229,271 ----


/*
+ * assign_intervalstyle: GUC assign_hook for datestyle
+ */
+ const char *
+ assign_intervalstyle(const char *value, bool doit, GucSource source)
+ {
+ int newIntervalStyle = IntervalStyle;
+ char * result = (char *) malloc(32);
+ if (pg_strcasecmp(value, "postgres") == 0)
+ {
+ newIntervalStyle = INTSTYLE_POSTGRES;
+ }
+ else if (pg_strcasecmp(value, "postgres_verbose") == 0)
+ {
+ newIntervalStyle = INTSTYLE_POSTGRES_VERBOSE;
+ }
+ else if (pg_strcasecmp(value, "sql_standard") == 0)
+ {
+ newIntervalStyle = INTSTYLE_SQL_STANDARD;
+ }
+ else
+ {
+ ereport(GUC_complaint_elevel(source),
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized \"intervalstyle\" key word: \"%s\"",
+ value)));
+ return NULL;
+ }
+ if (doit)
+ {
+ IntervalStyle = newIntervalStyle;
+ strcpy(result, value);
+ }
+ return result;
+ }
+
+
+ /*
* TIMEZONE
*/

*** a/src/backend/utils/adt/datetime.c
--- b/src/backend/utils/adt/datetime.c
***************
*** 3605,3610 **** EncodeDateTime(struct pg_tm * tm, fsec_t fsec, int *tzp, char **tzn, int style,
--- 3605,3624 ----
return TRUE;
}

+ /*
+ * small helper funciton to avoid copy&paste of this ifdef below
+ */
+ void
+ AppendFsec(char * cp,fsec_t fsec) {
+ if (fsec==0) return;
+ #ifdef HAVE_INT64_TIMESTAMP
+ sprintf(cp, ".%06d", Abs(fsec));
+ #else
+ sprintf(cp, ":%012.9f", fabs(fsec));
+ #endif
+ TrimTrailingZeros(cp);
+ }
+

/* EncodeInterval()
* Interpret time structure as a delta time and convert to string.
***************
*** 3613,3618 **** EncodeDateTime(struct pg_tm * tm, fsec_t fsec, int *tzp, char **tzn, int style,
--- 3627,3643 ----
* Actually, afaik ISO does not address time interval formatting,
* but this looks similar to the spec for absolute date/time.
* - thomas 1998-04-30
+ *
+ * Actually, afaik, ISO 8601 does specify formats for "time
+ * intervals...[of the]...format with time-unit designators", which
+ * are pretty ugly. The format looks something like
+ * P1Y1M1DT1H1M1.12345S
+ * but useful for exchanging data with computers instead of humans.
+ * - ron 2003-07-14
+ *
+ * And ISO's SQL 2008 standard specifies standards for
+ * "year-month literal"s (that look like '2-3') and
+ * "day-time literal"s (that look like ('4 5:6:7')
*/
int
EncodeInterval(struct pg_tm * tm, fsec_t fsec, int style, char *str)
***************
*** 3621,3626 **** EncodeInterval(struct pg_tm * tm, fsec_t fsec, int style, char *str)
--- 3646,3658 ----
bool is_nonzero = FALSE;
char *cp = str;

+ int year = tm->tm_year;
+ int mon = tm->tm_mon;
+ int mday = tm->tm_mday;
+ int hour = tm->tm_hour;
+ int min = tm->tm_min;
+ int sec = tm->tm_sec;
+
/*
* The sign of year and month are guaranteed to match, since they are
* stored internally as "month". But we'll need to check for is_before and
***************
*** 3628,3635 **** EncodeInterval(struct pg_tm * tm, fsec_t fsec, int style, char *str)
*/
switch (style)
{
! /* compatible with ISO date formats */
! case USE_ISO_DATES:
if (tm->tm_year != 0)
{
sprintf(cp, "%d year%s",
--- 3660,3738 ----
*/
switch (style)
{
! /* SQL Standard interval literals */
! case INTSTYLE_SQL_STANDARD:
! {
! bool has_negative = (year < 0) || (mon < 0) ||
! (mday < 0) || (hour < 0) ||
! (min < 0) || (sec < 0) || (fsec<0);
! bool has_positive = (year > 0) || (mon > 0) ||
! (mday > 0) || (hour > 0) ||
! (min > 0) || (sec > 0) || (fsec>0);
! bool has_year_month = (year != 0) || (mon != 0);
! bool has_datetime = (hour != 0) || (min != 0) ||
! (sec != 0) || (fsec!= 0) || (mday != 0);
! bool has_day = (mday != 0);
! bool sql_standard_value = (!(has_negative && has_positive)) &&
! (!(has_year_month && has_datetime));
! /*
! * SQL Standard wants only 1 "<sign>" preceeding the whole
! * interval.
! */
! if (has_negative && sql_standard_value)
! {
! sprintf(cp,"-");
! cp++;
! year = -year;
! mon = -mon;
! mday = -mday;
! hour = -hour;
! min = -min;
! sec = -sec;
! fsec = -fsec;
! }
! if (!has_negative && !has_positive)
! {
! sprintf(cp,"0");
! }
! else if (!sql_standard_value)
! {
! /*
! * For non sql-standard interval values,
! * force outputting the signs to avoid
! * ambiguities with intervals with mixed
! * sign components.
! */
! char year_sign = (year<0 || mon<0) ? '-' : '+';
! char day_sign = (mday<0) ? '-' : '+';
! char sec_sign = (hour<0 || min<0 || sec<0 || fsec<0)
! ? '-' : '+';
! sprintf(cp,"%c%d-%d %c%d %c%d:%02d:%02d",
! year_sign,abs(year),abs(mon),
! day_sign,abs(mday),
! sec_sign,abs(hour),abs(min),abs(sec));
! AppendFsec(cp+strlen(cp),fsec);
! }
! else if (has_year_month)
! {
! sprintf(cp,"%d-%d",year,mon);
! }
! else if (has_day)
! {
! sprintf(cp,"%d %d:%02d:%02d",mday,hour,min,sec);
! AppendFsec(cp+strlen(cp),fsec);
! }
! else
! {
! sprintf(cp,"%d:%02d:%02d",hour,min,sec);
! AppendFsec(cp+strlen(cp),fsec);
! }
! cp += strlen(cp);
! break;
! }
!
! /* compatible with postgresql 8.3 when DateStyle = 'iso' */
! case INTSTYLE_POSTGRES:
if (tm->tm_year != 0)
{
sprintf(cp, "%d year%s",
***************
*** 3692,3700 **** EncodeInterval(struct pg_tm * tm, fsec_t fsec, int style, char *str)
cp += strlen(cp);
}
}
break;

! case USE_POSTGRES_DATES:
default:
strcpy(cp, "@ ");
cp += strlen(cp);
--- 3795,3809 ----
cp += strlen(cp);
}
}
+ if (!is_nonzero)
+ {
+ strcat(cp, "0");
+ cp += strlen(cp);
+ }
break;

! /* compatible with postgresql 8.3 when DateStyle = 'sql' */
! case INTSTYLE_POSTGRES_VERBOSE:
default:
strcpy(cp, "@ ");
cp += strlen(cp);
***************
*** 3821,3842 **** EncodeInterval(struct pg_tm * tm, fsec_t fsec, int style, char *str)
is_before = (tm->tm_sec < 0);
is_nonzero = TRUE;
}
break;
}

- /* identically zero? then put in a unitless zero... */
- if (!is_nonzero)
- {
- strcat(cp, "0");
- cp += strlen(cp);
- }
-
- if (is_before && (style != USE_ISO_DATES))
- {
- strcat(cp, " ago");
- cp += strlen(cp);
- }
-
return 0;
} /* EncodeInterval() */

--- 3930,3948 ----
is_before = (tm->tm_sec < 0);
is_nonzero = TRUE;
}
+ if (!is_nonzero)
+ {
+ strcat(cp, "0");
+ cp += strlen(cp);
+ }
+ if (is_before)
+ {
+ strcat(cp, " ago");
+ cp += strlen(cp);
+ }
break;
}

return 0;
} /* EncodeInterval() */

*** a/src/backend/utils/adt/nabstime.c
--- b/src/backend/utils/adt/nabstime.c
***************
*** 671,677 **** reltimeout(PG_FUNCTION_ARGS)
char buf[MAXDATELEN + 1];

reltime2tm(time, tm);
! EncodeInterval(tm, 0, DateStyle, buf);

result = pstrdup(buf);
PG_RETURN_CSTRING(result);
--- 671,677 ----
char buf[MAXDATELEN + 1];

reltime2tm(time, tm);
! EncodeInterval(tm, 0, IntervalStyle, buf);

result = pstrdup(buf);
PG_RETURN_CSTRING(result);
*** a/src/backend/utils/adt/timestamp.c
--- b/src/backend/utils/adt/timestamp.c
***************
*** 677,683 **** interval_out(PG_FUNCTION_ARGS)
if (interval2tm(*span, tm, &fsec) != 0)
elog(ERROR, "could not convert interval to tm");

! if (EncodeInterval(tm, fsec, DateStyle, buf) != 0)
elog(ERROR, "could not format interval");

result = pstrdup(buf);
--- 677,683 ----
if (interval2tm(*span, tm, &fsec) != 0)
elog(ERROR, "could not convert interval to tm");

! if (EncodeInterval(tm, fsec, IntervalStyle, buf) != 0)
elog(ERROR, "could not format interval");

result = pstrdup(buf);
*** a/src/backend/utils/init/globals.c
--- b/src/backend/utils/init/globals.c
***************
*** 88,93 **** bool ExitOnAnyError = false;
--- 88,94 ----

int DateStyle = USE_ISO_DATES;
int DateOrder = DATEORDER_MDY;
+ int IntervalStyle = INTSTYLE_POSTGRES;
bool HasCTZSet = false;
int CTimeZone = 0;

*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 367,372 **** static bool session_auth_is_superuser;
--- 367,373 ----
static double phony_random_seed;
static char *client_encoding_string;
static char *datestyle_string;
+ static char *intervalstyle_string;
static char *locale_collate;
static char *locale_ctype;
static char *server_encoding_string;
***************
*** 2098,2103 **** static struct config_string ConfigureNamesString[] =
--- 2099,2114 ----
"ISO, MDY", assign_datestyle, NULL
},

+ {
+ {"IntervalStyle", PGC_USERSET, CLIENT_CONN_LOCALE,
+ gettext_noop("Sets the display format for interval values."),
+ gettext_noop(""),
+ GUC_REPORT
+ },
+ &intervalstyle_string,
+ "postgres", assign_intervalstyle, NULL
+ },
+
{
{"default_tablespace", PGC_USERSET, CLIENT_CONN_STATEMENT,
gettext_noop("Sets the default tablespace to create tables and indexes in."),
*** a/src/backend/utils/misc/postgresql.conf.sample
--- b/src/backend/utils/misc/postgresql.conf.sample
***************
*** 426,431 ****
--- 426,432 ----
# - Locale and Formatting -

#datestyle = 'iso, mdy'
+ #intervalstyle = 'postgres'
#timezone = unknown # actually, defaults to TZ environment
# setting
#timezone_abbreviations = 'Default' # Select the set of available time zone
*** a/src/bin/psql/tab-complete.c
--- b/src/bin/psql/tab-complete.c
***************
*** 1953,1958 **** psql_completion(char *text, int start, int end)
--- 1953,1965 ----

COMPLETE_WITH_LIST(my_list);
}
+ else if (pg_strcasecmp(prev2_wd, "IntervalStyle") == 0)
+ {
+ static const char *const my_list[] =
+ {"postgres","postgres_verbose", "sql_standard", NULL};
+
+ COMPLETE_WITH_LIST(my_list);
+ }
else if (pg_strcasecmp(prev2_wd, "GEQO") == 0)
{
static const char *const my_list[] =
*** a/src/include/commands/variable.h
--- b/src/include/commands/variable.h
***************
*** 15,20 ****
--- 15,22 ----

extern const char *assign_datestyle(const char *value,
bool doit, GucSource source);
+ extern const char *assign_intervalstyle(const char *value,
+ bool doit, GucSource source);
extern const char *assign_timezone(const char *value,
bool doit, GucSource source);
extern const char *show_timezone(void);
*** a/src/include/miscadmin.h
--- b/src/include/miscadmin.h
***************
*** 191,196 **** extern PGDLLIMPORT Oid MyDatabaseTableSpace;
--- 191,208 ----

extern int DateStyle;
extern int DateOrder;
+
+ /*
+ * IntervalStyles
+ * INTSTYLE_POSTGRES Like Postgres8.3 when DateStyle = 'iso'
+ * INTSTYLE_POSTGRES_VERBOSE Like Postgres8.3 when DateStyle = 'sql'
+ * INTSTYLE_SQL_STANDARD SQL standard interals
+ */
+ #define INTSTYLE_POSTGRES 0
+ #define INTSTYLE_POSTGRES_VERBOSE 1
+ #define INTSTYLE_SQL_STANDARD 2
+
+ extern int IntervalStyle;

/*
* HasCTZSet is true if user has set timezone as a numeric offset from UTC.
*** a/src/test/regress/sql/alter_table.sql
--- b/src/test/regress/sql/alter_table.sql
***************
*** 60,65 **** ALTER TABLE tmp ADD COLUMN y float4[];
--- 60,67 ----

ALTER TABLE tmp ADD COLUMN z int2[];

+ SET IntervalStyle to postgres_verbose;
+
INSERT INTO tmp (a, b, c, d, e, f, g, h, i, j, k, l, m, n, p, q, r, s, t, u,
v, w, x, y, z)
VALUES (4, 'name', 'text', 4.1, 4.1, 2, '(4.1,4.1,3.1,3.1)',
*** a/src/test/regress/sql/arrays.sql
--- b/src/test/regress/sql/arrays.sql
***************
*** 283,288 **** select '{ }}'::text[];
--- 283,289 ----
select array[];
-- none of the above should be accepted

+ SET intervalstyle to postgres_verbose;
-- all of the following should be accepted
select '{}'::text[];
select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
*** a/src/test/regress/sql/horology.sql
--- b/src/test/regress/sql/horology.sql
***************
*** 73,79 **** SELECT date '1991-02-03' - time with time zone '04:05:06 UTC' AS "Subtract Time
--
-- timestamp, interval arithmetic
--
!
SELECT timestamp without time zone '1996-03-01' - interval '1 second' AS "Feb 29";
SELECT timestamp without time zone '1999-03-01' - interval '1 second' AS "Feb 28";
SELECT timestamp without time zone '2000-03-01' - interval '1 second' AS "Feb 29";
--- 73,79 ----
--
-- timestamp, interval arithmetic
--
! set IntervalStyle to postgres_verbose;
SELECT timestamp without time zone '1996-03-01' - interval '1 second' AS "Feb 29";
SELECT timestamp without time zone '1999-03-01' - interval '1 second' AS "Feb 28";
SELECT timestamp without time zone '2000-03-01' - interval '1 second' AS "Feb 29";
*** a/src/test/regress/sql/interval.sql
--- b/src/test/regress/sql/interval.sql
***************
*** 94,99 **** FROM INTERVAL_MULDIV_TBL;
--- 94,100 ----
DROP TABLE INTERVAL_MULDIV_TBL;

SET DATESTYLE = 'postgres';
+ SET INTERVALSTYLE = 'postgres_verbose';

SELECT '' AS ten, * FROM INTERVAL_TBL;

***************
*** 118,123 **** SELECT justify_interval(interval '1 month -1 hour') as "1 month -1 hour";
--- 119,126 ----

-- test fractional second input, and detection of duplicate units
SET DATESTYLE = 'ISO';
+ SET INTERVALSTYLE = 'postgres';
+
SELECT '1 millisecond'::interval, '1 microsecond'::interval,
'500 seconds 99 milliseconds 51 microseconds'::interval;
SELECT '3 days 5 milliseconds'::interval;
*** a/src/test/regress/sql/reltime.sql
--- b/src/test/regress/sql/reltime.sql
***************
*** 2,7 ****
--- 2,10 ----
-- RELTIME
--

+ -- DateStyle is 'Postgres, MDY' here...
+ SET intervalstyle to postgres_verbose;
+
CREATE TABLE RELTIME_TBL (f1 reltime);

INSERT INTO RELTIME_TBL (f1) VALUES ('@ 1 minute');
***************
*** 23,29 **** INSERT INTO RELTIME_TBL (f1) VALUES ('badly formatted reltime');
INSERT INTO RELTIME_TBL (f1) VALUES ('@ 30 eons ago');

-- test reltime operators
-
SELECT '' AS six, * FROM RELTIME_TBL;

SELECT '' AS five, * FROM RELTIME_TBL
--- 26,31 ----
*** a/src/test/regress/sql/timestamp.sql
--- b/src/test/regress/sql/timestamp.sql
***************
*** 163,168 **** SELECT '' AS "16", d1 FROM TIMESTAMP_TBL
--- 163,171 ----
SELECT '' AS "49", d1 FROM TIMESTAMP_TBL
WHERE d1 >= timestamp without time zone '1997-01-02';

+ -- DateStyle was 'Postgres, MDY' at this point.
+ SET intervalstyle to postgres_verbose;
+
SELECT '' AS "54", d1 - timestamp without time zone '1997-01-02' AS diff
FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';

*** a/src/test/regress/sql/timestamptz.sql
--- b/src/test/regress/sql/timestamptz.sql
***************
*** 182,187 **** SELECT '' AS "16", d1 FROM TIMESTAMPTZ_TBL
--- 182,190 ----
SELECT '' AS "49", d1 FROM TIMESTAMPTZ_TBL
WHERE d1 >= timestamp with time zone '1997-01-02';

+ -- Datestyle was Postgres, MDY here
+ SET intervalstyle TO postgres_verbose;
+
SELECT '' AS "54", d1 - timestamp with time zone '1997-01-02' AS diff
FROM TIMESTAMPTZ_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';

Ron Mayer wrote:
> Tom Lane wrote:
>> ...GUC that selected PG traditional, SQL-standard... interval output
>> format seems like it could be a good idea.
>

This is an update to the earlier SQL-standard-interval-literal output
patch that I submitted here:
http://archives.postgresql.org/message-id/48D15471.6080305@cheapcomplexdevices.com

This version fixes a couple bugs in my last patch related to reltime output and
with the new GUC variable, and updated the regression tests to adjust the
new IntervalStyle guc to match the output of the previous regression tests
where the interval output depended on DateStyle.

I've also added it to the Nov CommitFest wiki page.

No comments: