Setting up a always up-to-date generated TSVECTOR column
Although using the built-in DataTypes.TSVECTOR type mentioned by Anatoly is tempting, we ideally also want to make that column be auto-generated from the column it indexes to keep it always up-to-date. I don't think there's an alternative to a raw query there so I'm using something like:
const col = 'mycol'
await sequelize.query(`ALTER TABLE "${MyTable.tableName}"
ADD COLUMN IF NOT EXISTS "${col}_tsvector" TSVECTOR
GENERATED ALWAYS AS (to_tsvector('english', "${col}")) STORED`)
await sequelize.query(`CREATE INDEX "${MyTable.tableName}_${col}_gin_idx"
ON "${Strings.tableName}" USING GIN ("${col}_tsvector")`)
Escaping user-provided queries so to_tsquery doesn't blow up
Another thing you likely will want to think about is that to_tsquery can lead to errors if you just give it user queries directly.
Solutions and workarounds can be found on this thread: PSQLException: ERROR: syntax error in tsquery e.g. you might want to use plainto_tsquery instead of to_tsquery.
Minimal runable example with asserts
Here I put everything together:
main.js
#!/usr/bin/env node
const assert = require('assert')
const { DataTypes, Op, Sequelize } = require('sequelize')
function assertEqual(rows, rowsExpect) {
assert.strictEqual(rows.length, rowsExpect.length)
for (let i = 0; i < rows.length; i++) {
let row = rows[i]
let rowExpect = rowsExpect[i]
for (let key in rowExpect) {
assert.strictEqual(row[key], rowExpect[key])
}
}
}
/** Safely consume a user provided query string to a prefix search tsquery Sequelize literal.
* For example, 'rabbit bee' gets converted to 'rabbit & bee:*' and therefore matches strings
* that contain both the full word "rabbit" and the prefix bee.*. */
function sequelizePostgresqlUserQueryToTsqueryPrefixLiteral(q) {
return sequelize.literal(
`regexp_replace(plainto_tsquery('english', ${sequelize.escape(q)})::text || ':*', '^..$', '')::tsquery`
)
}
async function reset() {
await sequelize.truncate({ cascade: true })
await Strings.create({ mycol: 'chicken elephant' })
await Strings.create({ mycol: 'beetle rabbit' })
await Strings.create({ mycol: 'elephant beetle' })
if (sequelize.options.dialect === 'postgres') {
const col = 'mycol'
await sequelize.query(`ALTER TABLE "${Strings.tableName}" ADD COLUMN "${col}_tsvector" tsvector
GENERATED ALWAYS AS (to_tsvector('english', "${col}")) STORED`)
await sequelize.query(`CREATE INDEX "${Strings.tableName}_${col}_gin_idx" ON "${Strings.tableName}" USING GIN ("${col}_tsvector")`)
}
}
const sequelize = new Sequelize('tmp', undefined, undefined, {
dialect: 'postgres',
host: '/var/run/postgresql',
})
const Strings = sequelize.define('Strings', {
mycol: { type: DataTypes.STRING },
})
;(async () => {
await sequelize.sync({ force: true })
await reset()
let rows
if (sequelize.options.dialect === 'postgres') {
rows = await Strings.findAll({
//where: { mycol_tsvector: { [Op.match]: Sequelize.fn('to_tsquery', 'beetle') } },
where: { mycol_tsvector: { [Op.match]: Sequelize.fn('plainto_tsquery', 'beetle') } },
})
assertEqual(rows, [
{ mycol: 'beetle rabbit' },
{ mycol: 'elephant beetle' },
])
// Prefix search on the last term. Does not blow up for arbitrary user input I believe.
rows = await Strings.findAll({
where: { mycol_tsvector: { [Op.match]: sequelizePostgresqlUserQueryToTsqueryPrefixLiteral('rabbit bee') } },
order: [['mycol', 'ASC']]
})
assertEqual(rows, [
{ mycol: 'beetle rabbit' },
])
// Mostly to check that these cases don't blow up due to bad to_tsquery.
rows = await Strings.findAll({
where: { mycol_tsvector: { [Op.match]: sequelizePostgresqlUserQueryToTsqueryPrefixLiteral('') } },
order: [['mycol', 'ASC']]
})
assertEqual(rows, [])
rows = await Strings.findAll({
where: { mycol_tsvector: { [Op.match]: sequelizePostgresqlUserQueryToTsqueryPrefixLiteral(',') } },
order: [['mycol', 'ASC']]
})
assertEqual(rows, [])
}
})().finally(() => { return sequelize.close() })
package.json
{
"name": "tmp",
"private": true,
"version": "1.0.0",
"dependencies": {
"pg": "8.5.1",
"pg-hstore": "2.3.3",
"sequelize": "6.14.0"
}
}
Tested on PostgreSQL 16.6, Node.js v20.10.0, Ubuntu 24.10.