Wednesday, December 19, 2007

PostgreSQL GROUP_CONCAT

CREATE TABLE produk
(
seq_id serial NOT NULL,
name character varying(100) NOT NULL
) ;


INSERT INTO produk (seq_id, name) VALUES (1, 'beer');
INSERT INTO produk (seq_id, name) VALUES (2, 'in');
INSERT INTO produk (seq_id, name) VALUES (3, 'beer');
INSERT INTO produk (seq_id, name) VALUES (4, 'can');
INSERT INTO produk (seq_id, name) VALUES (6, 'goods');
INSERT INTO produk (seq_id, name) VALUES (7, 'goods');
INSERT INTO produk (seq_id, name) VALUES (8, 'can');
INSERT INTO produk (seq_id, name) VALUES (5, 'goods');


name | id_of_duplicates
----------+------------------
beer | 1,3
can | 4,8
goods | 5,6,7
in | 2



MySQL:

select name, group_concat(cast(seq_id as char)) as id_of_duplicates
from produk
group by name
order by name;


PostgreSQL:

create aggregate array_accum (
sfunc = array_append,
basetype = anyelement,
stype = anyarray,
initcond = '{}'
);

CREATE OR REPLACE FUNCTION _group_concat(text, text)
RETURNS text AS $$
SELECT CASE
WHEN $2 IS NULL THEN $1
WHEN $1 IS NULL THEN $2
ELSE $1 operator(pg_catalog.||) ',' operator(pg_catalog.||) $2
END
$$ IMMUTABLE LANGUAGE SQL;

CREATE AGGREGATE group_concat (
BASETYPE = text,
SFUNC = _group_concat,
STYPE = text
);




first approach:


select name, array_accum(seq_id)
from produk
group by name



select name, array_to_string(array_accum(seq_id), ',')
from produk
group by name;



second approach (mysql-compatible approach):

select name, group_concat(seq_id) as id_of_duplicates
from produk
group by name
order by name;


MySQL GROUP_CONCAT with ordering:

select name, group_concat(cast(seq_id as char) order by seq_id) as id_of_duplicates
from produk
group by name
order by name;

PostgreSQL equivalent:

select name, group_concat(distinct seq_id) as id_of_duplicates
from produk
group by name
order by name;

using customized sort:

select name, group_concat(seq_id) as id_of_duplicates
from
(
select name, seq_id
from produk
order by name, seq_id
) as x
group by name


PostgreSQL is a lot more nicer, it allows you to define your own aggregate function. GROUP_CONCAT is not built-in, with user-defined aggregate, you can create one.

Labels: , , ,

delete duplicate rows (and leaving one copy) in PostgreSQL, MS SQL, MySQL

MS SQL: OK, PostgreSQL: OK, MySQL: NOT OK


delete from product where
seq_id not in (select min(seq_id) from product group by name)

to make this work with MySQL:

delete from product where
seq_id not in (select x from (select min(seq_id) as x from product group by name))




MS SQL: OK, PostgreSQL: OK, MySQL: NOT OK

delete from product where
exists(select * from product x where x.name = product.name and product.seq_id > x.seq_id)



MS SQL: OK, PostgreSQL: OK, MySQL: NOT OK

/* advantage: can re-use existing query's logic for finding the first record of each duplicate */

delete from product where
exists(
select x.name, min(x.seq_id)
from product x
where x.name = product.name
group by x.name
having product.seq_id > min(x.seq_id)
);


MS SQL: OK, PostgreSQL: NOT OK, MySQL: OK

delete product from product
inner join product b
on b.name = product.name and product.seq_id > b.seq_id;

MS SQL: OK, PostgreSQL: OK, MySQL: NOT OK


delete from product where
seq_id in
(select a.seq_id from product a inner join product b on a.name = b.name and a.seq_id > b.seq_id);

to make this work with MySQL:

delete from product where
seq_id in
(select x from (select a.seq_id as x from product a inner join product b on a.name = b.name and a.seq_id > b.seq_id));

/* MySQL is pretty lame in this last case */

Labels: , , , , ,