Adjust ts_debug's output as per my proposal of yesterday: show the
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 22 Oct 2007 20:13:37 +0000 (20:13 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 22 Oct 2007 20:13:37 +0000 (20:13 +0000)
active dictionary and its output lexemes as separate columns, instead
of smashing them into one text column, and lowercase the column names.
Also, define the output rowtype using OUT parameters instead of a
composite type, to be consistent with the other built-in functions.

doc/src/sgml/func.sgml
doc/src/sgml/textsearch.sgml
src/backend/catalog/system_views.sql
src/include/catalog/catversion.h

index b32e789cdd52459ead876479762bce02416de720..e7b8f10477ba0341b6f5f59b22440b55c5475b7d 100644 (file)
@@ -7857,11 +7857,11 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
       </thead>
       <tbody>
        <row>
-        <entry><literal><function>ts_debug</function>(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>)</literal></entry>
-        <entry><type>setof ts_debug</type></entry>
+        <entry><literal><function>ts_debug</function>(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>, OUT <replaceable class="PARAMETER">alias</> <type>text</>, OUT <replaceable class="PARAMETER">description</> <type>text</>, OUT <replaceable class="PARAMETER">token</> <type>text</>, OUT <replaceable class="PARAMETER">dictionaries</> <type>regdictionary[]</>, OUT <replaceable class="PARAMETER">dictionary</> <type>regdictionary</>, OUT <replaceable class="PARAMETER">lexemes</> <type>text[]</>)</literal></entry>
+        <entry><type>setof record</type></entry>
         <entry>test a configuration</entry>
         <entry><literal>ts_debug('english', 'The Brightest supernovaes')</literal></entry>
-        <entry><literal>(lword,"Latin word",The,{english_stem},"english_stem: {}") ...</literal></entry>
+        <entry><literal>(lword,"Latin word",The,{english_stem},english_stem,{}) ...</literal></entry>
        </row>
        <row>
         <entry><literal><function>ts_lexize</function>(<replaceable class="PARAMETER">dict</replaceable> <type>regdictionary</>, <replaceable class="PARAMETER">token</replaceable> <type>text</>)</literal></entry>
index f59724d1232b20cfd44779157d38d338a5434cf5..cc5dd6dd60e51546283e1775738430fae0ca31d9 100644 (file)
@@ -1699,18 +1699,18 @@ ON messages FOR EACH ROW EXECUTE PROCEDURE messages_trigger();
     <itemizedlist  spacing="compact" mark="bullet">
      <listitem>
       <para>
-       <structname>word</> <type>text</> &mdash; the value of a lexeme
+       <replaceable>word</> <type>text</> &mdash; the value of a lexeme
       </para>
      </listitem>
      <listitem>
       <para>
-       <structname>ndoc</> <type>integer</> &mdash; number of documents
+       <replaceable>ndoc</> <type>integer</> &mdash; number of documents
        (<type>tsvector</>s) the word occurred in
       </para>
      </listitem>
      <listitem>
       <para>
-       <structname>nentry</> <type>integer</> &mdash; total number of
+       <replaceable>nentry</> <type>integer</> &mdash; total number of
        occurrences of the word
       </para>
      </listitem>
@@ -1901,8 +1901,8 @@ LIMIT 10;
    as the entire word and as each component:
 
 <programlisting>
-SELECT "Alias", "Description", "Token" FROM ts_debug('foo-bar-beta1');
-    Alias    |          Description          |     Token     
+SELECT alias, description, token FROM ts_debug('foo-bar-beta1');
+    alias    |          description          |     token     
 -------------+-------------------------------+---------------
  hword       | Hyphenated word               | foo-bar-beta1
  lpart_hword | Latin part of hyphenated word | foo
@@ -1917,8 +1917,8 @@ SELECT "Alias", "Description", "Token" FROM ts_debug('foo-bar-beta1');
    instructive example:
 
 <programlisting>
-SELECT "Alias", "Description", "Token" FROM ts_debug('http://foo.com/stuff/index.html');
-  Alias   |  Description  |          Token           
+SELECT alias, description, token FROM ts_debug('http://foo.com/stuff/index.html');
+  alias   |  description  |          token           
 ----------+---------------+--------------------------
  protocol | Protocol head | http://
  url      | URL           | foo.com/stuff/index.html
@@ -2186,25 +2186,23 @@ SELECT ts_lexize('public.simple_dict','The');
     synonym dictionary and put it before the <literal>english_stem</> dictionary:
 
 <programlisting>
-SELECT * FROM ts_debug('english','Paris');
- Alias | Description | Token |  Dictionaries  |    Lexized token
--------+-------------+-------+----------------+----------------------
- lword | Latin word  | Paris | {english_stem} | english_stem: {pari}
-(1 row)
+SELECT * FROM ts_debug('english', 'Paris');
+ alias | description | token |  dictionaries  |  dictionary  | lexemes 
+-------+-------------+-------+----------------+--------------+---------
+ lword | Latin word  | Paris | {english_stem} | english_stem | {pari}
 
-CREATE TEXT SEARCH DICTIONARY synonym (
+CREATE TEXT SEARCH DICTIONARY my_synonym (
     TEMPLATE = synonym,
     SYNONYMS = my_synonyms
 );
 
 ALTER TEXT SEARCH CONFIGURATION english
-    ALTER MAPPING FOR lword WITH synonym, english_stem;
+    ALTER MAPPING FOR lword WITH my_synonym, english_stem;
 
-SELECT * FROM ts_debug('english','Paris');
- Alias | Description | Token |      Dictionaries      |  Lexized token
--------+-------------+-------+------------------------+------------------
- lword | Latin word  | Paris | {synonym,english_stem} | synonym: {paris}
-(1 row)
+SELECT * FROM ts_debug('english', 'Paris');
+ alias | description | token |       dictionaries        | dictionary | lexemes 
+-------+-------------+-------+---------------------------+------------+---------
+ lword | Latin word  | Paris | {my_synonym,english_stem} | my_synonym | {paris}
 </programlisting>
    </para>
 
@@ -2711,7 +2709,14 @@ SHOW default_text_search_config;
   </indexterm>
 
   <synopsis>
-   ts_debug(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>) returns <type>setof ts_debug</>
+   ts_debug(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>,
+            OUT <replaceable class="PARAMETER">alias</> <type>text</>,
+            OUT <replaceable class="PARAMETER">description</> <type>text</>,
+            OUT <replaceable class="PARAMETER">token</> <type>text</>,
+            OUT <replaceable class="PARAMETER">dictionaries</> <type>regdictionary[]</>,
+            OUT <replaceable class="PARAMETER">dictionary</> <type>regdictionary</>,
+            OUT <replaceable class="PARAMETER">lexemes</> <type>text[]</>)
+            returns setof record
   </synopsis>
 
   <para>
@@ -2725,23 +2730,47 @@ SHOW default_text_search_config;
   </para>
 
   <para>
-   <function>ts_debug</>'s result row type is defined as:
+   <function>ts_debug</> returns one row for each token identified in the text
+   by the parser.  The columns returned are
 
-<programlisting>
-CREATE TYPE ts_debug AS (
-    "Alias" text,
-    "Description" text,
-    "Token" text,
-    "Dictionaries" regdictionary[],
-    "Lexized token" text
-);
-</programlisting>
-
-   One row is produced for each token identified by the parser.
-   The first three columns describe the token, and the fourth lists
-   the dictionaries selected by the configuration for that token's type.
-   The last column shows the result of dictionary processing: which
-   dictionary (if any) recognized the token, and what it produced.
+    <itemizedlist  spacing="compact" mark="bullet">
+     <listitem>
+      <para>
+       <replaceable>alias</> <type>text</> &mdash; short name of the token type
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <replaceable>description</> <type>text</> &mdash; description of the
+       token type
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <replaceable>token</> <type>text</> &mdash; text of the token
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <replaceable>dictionaries</> <type>regdictionary[]</> &mdash; the
+       dictionaries selected by the configuration for this token type
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <replaceable>dictionary</> <type>regdictionary</> &mdash; the dictionary
+       that recognized the token, or <literal>NULL</> if none did
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <replaceable>lexemes</> <type>text[]</> &mdash; the lexeme(s) produced
+       by the dictionary that recognized the token, or <literal>NULL</> if
+       none did; an empty array (<literal>{}</>) means it was recognized as a
+       stop word
+      </para>
+     </listitem>
+    </itemizedlist>
   </para>
 
   <para>
@@ -2749,33 +2778,32 @@ CREATE TYPE ts_debug AS (
 
 <programlisting>
 SELECT * FROM ts_debug('english','a fat  cat sat on a mat - it ate a fat rats');
- Alias |  Description  | Token | Dictionaries | Lexized token
--------+---------------+-------+--------------+----------------
- lword | Latin word    | a     | {english}    | english: {}
- blank | Space symbols |       |              |
- lword | Latin word    | fat   | {english}    | english: {fat}
- blank | Space symbols |       |              |
- lword | Latin word    | cat   | {english}    | english: {cat}
- blank | Space symbols |       |              |
- lword | Latin word    | sat   | {english}    | english: {sat}
- blank | Space symbols |       |              |
- lword | Latin word    | on    | {english}    | english: {}
- blank | Space symbols |       |              |
- lword | Latin word    | a     | {english}    | english: {}
- blank | Space symbols |       |              |
- lword | Latin word    | mat   | {english}    | english: {mat}
- blank | Space symbols |       |              |
- blank | Space symbols | -     |              |
- lword | Latin word    | it    | {english}    | english: {}
- blank | Space symbols |       |              |
- lword | Latin word    | ate   | {english}    | english: {ate}
- blank | Space symbols |       |              |
- lword | Latin word    | a     | {english}    | english: {}
- blank | Space symbols |       |              |
- lword | Latin word    | fat   | {english}    | english: {fat}
- blank | Space symbols |       |              |
- lword | Latin word    | rats  | {english}    | english: {rat}
-   (24 rows)
+ alias |  description  | token |  dictionaries  |  dictionary  | lexemes 
+-------+---------------+-------+----------------+--------------+---------
+ lword | Latin word    | a     | {english_stem} | english_stem | {}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | fat   | {english_stem} | english_stem | {fat}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | cat   | {english_stem} | english_stem | {cat}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | sat   | {english_stem} | english_stem | {sat}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | on    | {english_stem} | english_stem | {}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | a     | {english_stem} | english_stem | {}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | mat   | {english_stem} | english_stem | {mat}
+ blank | Space symbols |       | {}             |              | 
+ blank | Space symbols | -     | {}             |              | 
+ lword | Latin word    | it    | {english_stem} | english_stem | {}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | ate   | {english_stem} | english_stem | {ate}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | a     | {english_stem} | english_stem | {}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | fat   | {english_stem} | english_stem | {fat}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | rats  | {english_stem} | english_stem | {rat}
 </programlisting>
   </para>
 
@@ -2801,34 +2829,33 @@ ALTER TEXT SEARCH CONFIGURATION public.english
 
 <programlisting>
 SELECT * FROM ts_debug('public.english','The Brightest supernovaes');
- Alias |  Description  |    Token    |                   Dictionaries                  |          Lexized token
--------+---------------+-------------+-------------------------------------------------+-------------------------------------
- lword | Latin word    | The         | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {}
- blank | Space symbols |             |                                                 |
- lword | Latin word    | Brightest   | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {bright}
- blank | Space symbols |             |                                                 |
- lword | Latin word    | supernovaes | {public.english_ispell,pg_catalog.english_stem} | pg_catalog.english_stem: {supernova}
-(5 rows)
+ alias |  description  |    token    |         dictionaries          |   dictionary   |   lexemes   
+-------+---------------+-------------+-------------------------------+----------------+-------------
+ lword | Latin word    | The         | {english_ispell,english_stem} | english_ispell | {}
+ blank | Space symbols |             | {}                            |                | 
+ lword | Latin word    | Brightest   | {english_ispell,english_stem} | english_ispell | {bright}
+ blank | Space symbols |             | {}                            |                | 
+ lword | Latin word    | supernovaes | {english_ispell,english_stem} | english_stem   | {supernova}
 </programlisting>
 
   <para>
    In this example, the word <literal>Brightest</> was recognized by the
    parser as a <literal>Latin word</literal> (alias <literal>lword</literal>).
    For this token type the dictionary list is
-   <literal>public.english_ispell</> and
-   <literal>pg_catalog.english_stem</literal>. The word was recognized by
-   <literal>public.english_ispell</literal>, which reduced it to the noun
+   <literal>english_ispell</> and
+   <literal>english_stem</literal>. The word was recognized by
+   <literal>english_ispell</literal>, which reduced it to the noun
    <literal>bright</literal>. The word <literal>supernovaes</literal> is
-   unknown to the <literal>public.english_ispell</literal> dictionary so it
+   unknown to the <literal>english_ispell</literal> dictionary so it
    was passed to the next dictionary, and, fortunately, was recognized (in
-   fact, <literal>public.english_stem</literal> is a Snowball dictionary which
+   fact, <literal>english_stem</literal> is a Snowball dictionary which
    recognizes everything; that is why it was placed at the end of the
    dictionary list).
   </para>
 
   <para>
    The word <literal>The</literal> was recognized by the
-   <literal>public.english_ispell</literal> dictionary as a stop word (<xref
+   <literal>english_ispell</literal> dictionary as a stop word (<xref
    linkend="textsearch-stopwords">) and will not be indexed.
    The spaces are discarded too, since the configuration provides no
    dictionaries at all for them.
@@ -2839,16 +2866,15 @@ SELECT * FROM ts_debug('public.english','The Brightest supernovaes');
    you want to see:
 
 <programlisting>
-SELECT "Alias", "Token", "Lexized token"
+SELECT alias, token, dictionary, lexemes
 FROM ts_debug('public.english','The Brightest supernovaes');
- Alias |    Token    |          Lexized token
--------+-------------+--------------------------------------
- lword | The         | public.english_ispell: {}
- blank |             |
- lword | Brightest   | public.english_ispell: {bright}
- blank |             |
- lword | supernovaes | pg_catalog.english_stem: {supernova}
-(5 rows)
+ alias |    token    |   dictionary   |   lexemes   
+-------+-------------+----------------+-------------
+ lword | The         | english_ispell | {}
+ blank |             |                | 
+ lword | Brightest   | english_ispell | {bright}
+ blank |             |                | 
+ lword | supernovaes | english_stem   | {supernova}
 </programlisting>
   </para>
 
index d1be35ad5713699aee82c1fc9a15002fc279a6cf..bdaceee7baba7c6d0b6fa2a234a1b13571330847 100644 (file)
@@ -386,41 +386,39 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_buf_written_backend() AS buffers_backend,
         pg_stat_get_buf_alloc() AS buffers_alloc;
 
--- Tsearch debug function. Defined here because it'd be pretty unwieldy
+-- Tsearch debug function.  Defined here because it'd be pretty unwieldy
 -- to put it into pg_proc.h
 
-CREATE TYPE ts_debug AS (
-    "Alias" text,
-    "Description" text,
-    "Token" text,
-    "Dictionaries" regdictionary[],
-    "Lexized token" text
-);
-
-COMMENT ON TYPE ts_debug IS 'type returned from ts_debug() function';
-
-CREATE FUNCTION ts_debug(regconfig, text)
-RETURNS SETOF ts_debug AS
+CREATE FUNCTION ts_debug(IN config regconfig, IN document text,
+    OUT alias text,
+    OUT description text,
+    OUT token text,
+    OUT dictionaries regdictionary[],
+    OUT dictionary regdictionary,
+    OUT lexemes text[])
+RETURNS SETOF record AS
 $$
 SELECT 
-    tt.alias AS "Alias",
-    tt.description AS "Description",
-    parse.token AS "Token",
+    tt.alias AS alias,
+    tt.description AS description,
+    parse.token AS token,
     ARRAY ( SELECT m.mapdict::pg_catalog.regdictionary
             FROM pg_catalog.pg_ts_config_map AS m
             WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
             ORDER BY m.mapseqno )
-    AS "Dictionaries",
-    (     
-        SELECT
-            dl.mapdict::pg_catalog.regdictionary  || ': ' || dl.lex::pg_catalog.text
-        FROM
-            ( SELECT mapdict, pg_catalog.ts_lexize(mapdict, parse.token) AS lex
-              FROM pg_catalog.pg_ts_config_map AS m
-              WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
-              ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno ) dl
-        LIMIT 1
-    ) AS "Lexized token"
+    AS dictionaries,
+    ( SELECT mapdict::pg_catalog.regdictionary
+      FROM pg_catalog.pg_ts_config_map AS m
+      WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
+      ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno
+      LIMIT 1
+    ) AS dictionary,
+    ( SELECT pg_catalog.ts_lexize(mapdict, parse.token)
+      FROM pg_catalog.pg_ts_config_map AS m
+      WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
+      ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno
+      LIMIT 1
+    ) AS lexemes
 FROM pg_catalog.ts_parse(
         (SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ), $2 
     ) AS parse,
@@ -434,8 +432,14 @@ LANGUAGE SQL STRICT STABLE;
 COMMENT ON FUNCTION ts_debug(regconfig,text) IS
     'debug function for text search configuration';
 
-CREATE FUNCTION ts_debug(text)
-RETURNS SETOF ts_debug AS
+CREATE FUNCTION ts_debug(IN document text,
+    OUT alias text,
+    OUT description text,
+    OUT token text,
+    OUT dictionaries regdictionary[],
+    OUT dictionary regdictionary,
+    OUT lexemes text[])
+RETURNS SETOF record AS
 $$
     SELECT * FROM pg_catalog.ts_debug( pg_catalog.get_current_ts_config(), $1);
 $$
index 58a9af7cd201401f0f405d3eaaa587dfd5b3d20d..01a3c0800df57fd5263e53c04a3568566e2e4d3f 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200710192
+#define CATALOG_VERSION_NO     200710221
 
 #endif