Pregunta

I have implemented full-text search in my Rails app using PostgreSQL's built-in mechanism. I've created the following migration in order to ensure that my index is updated after each insert/update:

class AddFulltextIndexToTracks < ActiveRecord::Migration
  def up
    add_column :tracks, :search_vector, 'tsvector'

    execute <<-SQL
      CREATE INDEX tracks_search_idx
      ON tracks
      USING gin(search_vector);
    SQL

    execute <<-SQL
      DROP TRIGGER IF EXISTS tracks_search_vector_update ON tracks;
    SQL

    execute <<-SQL
      CREATE OR REPLACE FUNCTION tracks_search_vector_update_callback() RETURNS TRIGGER AS $$
      DECLARE 
        new_tsvector tsvector;

      BEGIN
          IF TG_OP = 'INSERT' THEN
            SELECT INTO new_tsvector
              p_search.document
            FROM (SELECT 
              NEW.id AS track_id,
              to_tsvector('simple', coalesce(unaccent(NEW.title), '')) ||
              to_tsvector('simple', coalesce(unaccent(NEW.original_file), '')) ||
              to_tsvector('simple', coalesce(unaccent(NEW.metadata->'title'), '')) ||
              to_tsvector('simple', coalesce(unaccent(NEW.metadata->'artist'), '')) ||
              to_tsvector('simple', coalesce(unaccent(NEW.metadata->'album'), '')) ||
              to_tsvector('simple', coalesce(string_agg(unaccent(track_tags.name), ' '), '')) as document
              FROM tracks
              LEFT OUTER JOIN track_tag_associations ON NEW.id = track_tag_associations.track_id 
              LEFT OUTER JOIN track_tags ON track_tag_associations.track_tag_id = track_tags.id 
              GROUP BY NEW.id) p_search            
              WHERE track_id = NEW.id;

              new.search_vector = new_tsvector;
          END IF;

          IF TG_OP = 'UPDATE' THEN
            SELECT INTO new_tsvector
              p_search.document
            FROM (SELECT 
              NEW.id AS track_id,
              to_tsvector('simple', coalesce(unaccent(NEW.title), '')) ||
              to_tsvector('simple', coalesce(unaccent(NEW.original_file), '')) ||
              to_tsvector('simple', coalesce(unaccent(NEW.metadata->'title'), '')) ||
              to_tsvector('simple', coalesce(unaccent(NEW.metadata->'artist'), '')) ||
              to_tsvector('simple', coalesce(unaccent(NEW.metadata->'album'), '')) ||
              to_tsvector('simple', coalesce(string_agg(unaccent(track_tags.name), ' '), '')) as document
              FROM tracks 
              LEFT OUTER JOIN track_tag_associations ON NEW.id = track_tag_associations.track_id 
              LEFT OUTER JOIN track_tags ON track_tag_associations.track_tag_id = track_tags.id 
              GROUP BY NEW.id) p_search            
              WHERE track_id = NEW.id;

              new.search_vector = new_tsvector;
          END IF;
          RETURN NEW;
      END
      $$ LANGUAGE 'plpgsql';
    SQL

    execute <<-SQL
      CREATE TRIGGER tracks_search_vector_update
      BEFORE INSERT OR UPDATE
      ON tracks
      FOR EACH ROW EXECUTE PROCEDURE
      tracks_search_vector_update_callback();
    SQL

    Track.find_each { |r| r.touch }
  end

  def down
    execute <<-SQL
      DROP TRIGGER IF EXISTS tracks_search_vector_update on tracks;
    SQL

    execute <<-SQL
      DROP FUNCTION IF EXISTS tracks_search_vector_update_callback();
    SQL

    remove_column :tracks, :search_vector, 'tsvector'
  end
end

It works well in development/production.

The problem is that the trigger is not called when I am running specs (RSpec-based, Database Cleaner is used to cleanup the DB).

This is a full log of spec that makes an attempt to use full-text search index:

  ActiveRecord::SchemaMigration Load (0.5ms)  SELECT "schema_migrations".* FROM "schema_migrations"
   (3.2ms)  ALTER TABLE "schema_migrations" DISABLE TRIGGER ALL;ALTER TABLE "editors" DISABLE TRIGGER ALL;ALTER TABLE "editor_authentications" DISABLE TRIGGER ALL;ALTER TABLE "dashboard_wallpapers" DISABLE TRIGGER ALL;ALTER TABLE "plans" DISABLE TRIGGER ALL;ALTER TABLE "editor_channel_roles" DISABLE TRIGGER ALL;ALTER TABLE "track_tags_hierarchies" DISABLE TRIGGER ALL;ALTER TABLE "track_tags" DISABLE TRIGGER ALL;ALTER TABLE "channels" DISABLE TRIGGER ALL;ALTER TABLE "servers" DISABLE TRIGGER ALL;ALTER TABLE "track_tag_associations" DISABLE TRIGGER ALL;ALTER TABLE "track_broadcasts" DISABLE TRIGGER ALL;ALTER TABLE "track_uploads" DISABLE TRIGGER ALL;ALTER TABLE "tracks" DISABLE TRIGGER ALL
   (2.0ms)  select table_name from information_schema.views where table_schema = 'radiokitwebapp_test'

   (28.2ms)  TRUNCATE TABLE "editors", "editor_authentications", "dashboard_wallpapers", "plans", "editor_channel_roles", "track_tags_hierarchies", "track_tags", "channels", "servers", "track_tag_associations", "track_broadcasts", "track_uploads", "tracks" RESTART IDENTITY CASCADE;
   (2.9ms)  ALTER TABLE "schema_migrations" ENABLE TRIGGER ALL;ALTER TABLE "editors" ENABLE TRIGGER ALL;ALTER TABLE "editor_authentications" ENABLE TRIGGER ALL;ALTER TABLE "dashboard_wallpapers" ENABLE TRIGGER ALL;ALTER TABLE "plans" ENABLE TRIGGER ALL;ALTER TABLE "editor_channel_roles" ENABLE TRIGGER ALL;ALTER TABLE "track_tags_hierarchies" ENABLE TRIGGER ALL;ALTER TABLE "track_tags" ENABLE TRIGGER ALL;ALTER TABLE "channels" ENABLE TRIGGER ALL;ALTER TABLE "servers" ENABLE TRIGGER ALL;ALTER TABLE "track_tag_associations" ENABLE TRIGGER ALL;ALTER TABLE "track_broadcasts" ENABLE TRIGGER ALL;ALTER TABLE "track_uploads" ENABLE TRIGGER ALL;ALTER TABLE "tracks" ENABLE TRIGGER ALL
   (0.1ms)  BEGIN

  Plan Exists (0.7ms)  SELECT  1 AS one FROM "plans"  WHERE "plans"."identifier" = 'small' LIMIT 1
  SQL (0.6ms)  INSERT INTO "plans" ("archive_capacity", "default", "drive_capacity", "identifier") VALUES ($1, $2, $3, $4) RETURNING "id"  [["archive_capacity", 26843545600], ["default", "t"], ["drive_capacity", 10737418240], ["identifier", "small"]]
   (0.1ms)  COMMIT


   (0.1ms)  BEGIN
  SQL (0.6ms)  INSERT INTO "servers" ("address", "created_at", "name", "updated_at") VALUES ($1, $2, $3, $4) RETURNING "id"  [["address", "254.199.64.7"], ["created_at", "2014-05-08 23:14:21.580465"], ["name", "server1"], ["updated_at", "2014-05-08 23:14:21.580465"]]
   (0.1ms)  COMMIT
   (0.1ms)  BEGIN
  Plan Load (0.3ms)  SELECT  "plans".* FROM "plans"  WHERE "plans"."id" = $1 LIMIT 1  [["id", 0]]
  Plan Load (0.6ms)  SELECT  "plans".* FROM "plans"  WHERE "plans"."default" = 't'  ORDER BY "plans"."id" ASC LIMIT 1
  SQL (1.0ms)  INSERT INTO "channels" ("created_at", "name", "plan_id", "server_id", "updated_at") VALUES ($1, $2, $3, $4, $5) RETURNING "id"  [["created_at", "2014-05-08 23:14:21.602073"], ["name", "eum"], ["plan_id", 1], ["server_id", 1], ["updated_at", "2014-05-08 23:14:21.602073"]]
  SQL (0.3ms)  UPDATE "servers" SET "channels_count" = COALESCE("channels_count", 0) + 1 WHERE "servers"."id" = 1
  SQL (1.1ms)  INSERT INTO "track_tags" ("channel_id", "created_at", "updated_at") VALUES ($1, $2, $3) RETURNING "id"  [["channel_id", 1], ["created_at", "2014-05-08 23:14:21.641002"], ["updated_at", "2014-05-08 23:14:21.641002"]]
   (0.3ms)  SELECT pg_try_advisory_lock(2085799232), 1399590861.6440396
  SQL (0.5ms)  INSERT INTO "track_tags_hierarchies" ("ancestor_id", "descendant_id", "generations") VALUES ($1, $2, $3)  [["ancestor_id", 1], ["descendant_id", 1], ["generations", 0]]
  TrackTag Load (0.5ms)  SELECT "track_tags".* FROM "track_tags"  WHERE "track_tags"."parent_id" = $1  ORDER BY name  [["parent_id", 1]]
   (0.4ms)  SELECT pg_advisory_unlock(2085799232), 1399590861.6652837
   (0.3ms)  COMMIT
  TrackTag Load (0.4ms)  SELECT  "track_tags".* FROM "track_tags"  WHERE "track_tags"."channel_id" = $1  ORDER BY "track_tags"."id" ASC LIMIT 1  [["channel_id", 1]]
   (0.1ms)  BEGIN
  Channel Load (0.4ms)  SELECT  "channels".* FROM "channels"  WHERE "channels"."id" = $1 LIMIT 1  [["id", 1]]
  TrackTag Exists (0.3ms)  SELECT  1 AS one FROM "track_tags"  WHERE ("track_tags"."name" = 'taggy' AND "track_tags"."parent_id" = 1) LIMIT 1
  SQL (0.3ms)  INSERT INTO "track_tags" ("channel_id", "created_at", "name", "parent_id", "updated_at") VALUES ($1, $2, $3, $4, $5) RETURNING "id"  [["channel_id", 1], ["created_at", "2014-05-08 23:14:21.678220"], ["name", "taggy"], ["parent_id", 1], ["updated_at", "2014-05-08 23:14:21.678220"]]
   (0.2ms)  SELECT pg_try_advisory_lock(2085799232), 1399590861.6800814
  SQL (0.2ms)  INSERT INTO "track_tags_hierarchies" ("ancestor_id", "descendant_id", "generations") VALUES ($1, $2, $3)  [["ancestor_id", 2], ["descendant_id", 2], ["generations", 0]]
   (0.4ms)              INSERT INTO "track_tags_hierarchies"
              (ancestor_id, descendant_id, generations)
            SELECT x.ancestor_id, 2, x.generations + 1
            FROM "track_tags_hierarchies" x
            WHERE x.descendant_id = 1

  TrackTag Load (0.3ms)  SELECT "track_tags".* FROM "track_tags"  WHERE "track_tags"."parent_id" = $1  ORDER BY name  [["parent_id", 2]]
   (0.1ms)  SELECT pg_advisory_unlock(2085799232), 1399590861.6850104
   (0.1ms)  COMMIT
   (0.2ms)  BEGIN
  SQL (4.8ms)  INSERT INTO "tracks" ("channel_id", "created_at", "metadata", "original_file", "original_file_mime_type", "original_file_size", "title", "updated_at") VALUES ($1, $2, $3, $4, $5, $6, $7, $8) RETURNING "id"  [["channel_id", 1], ["created_at", "2014-05-08 23:14:21.710895"], ["metadata", ""], ["original_file", "sine-stereo-440hz-0point8-1sec-128kbit-cbr.mp3"], ["original_file_mime_type", "audio/mpeg"], ["original_file_size", 17135], ["title", "sine-stereo-440hz-0point8-1sec-128kbit-cbr"], ["updated_at", "2014-05-08 23:14:21.710895"]]
  SQL (0.8ms)  UPDATE "channels" SET drive_usage = drive_usage + 17135 WHERE "channels"."id" = 1
   (0.2ms)  COMMIT
   (0.1ms)  BEGIN
  SQL (1.1ms)  INSERT INTO "track_tag_associations" ("created_at", "track_id", "track_tag_id", "updated_at") VALUES ($1, $2, $3, $4) RETURNING "id"  [["created_at", "2014-05-08 23:14:21.733305"], ["track_id", 1], ["track_tag_id", 2], ["updated_at", "2014-05-08 23:14:21.733305"]]
  SQL (2.3ms)  UPDATE "tracks" SET "updated_at" = '2014-05-08 23:14:21.739704' WHERE "tracks"."id" = 1
   (0.1ms)  COMMIT
  SQL (1.1ms)  UPDATE "tracks" SET "duration" = NULL, "metadata" = '', "analysis_state" = 0 WHERE "tracks"."id" = 1
   (0.6ms)  SELECT COUNT(*) FROM "tracks"  WHERE (tracks.search_vector @@ to_tsquery('simple', 'abc+def') )
   (1.8ms)  ALTER TABLE "schema_migrations" DISABLE TRIGGER ALL;ALTER TABLE "editors" DISABLE TRIGGER ALL;ALTER TABLE "editor_authentications" DISABLE TRIGGER ALL;ALTER TABLE "dashboard_wallpapers" DISABLE TRIGGER ALL;ALTER TABLE "plans" DISABLE TRIGGER ALL;ALTER TABLE "editor_channel_roles" DISABLE TRIGGER ALL;ALTER TABLE "track_tags_hierarchies" DISABLE TRIGGER ALL;ALTER TABLE "track_tags" DISABLE TRIGGER ALL;ALTER TABLE "channels" DISABLE TRIGGER ALL;ALTER TABLE "servers" DISABLE TRIGGER ALL;ALTER TABLE "track_tag_associations" DISABLE TRIGGER ALL;ALTER TABLE "track_broadcasts" DISABLE TRIGGER ALL;ALTER TABLE "track_uploads" DISABLE TRIGGER ALL;ALTER TABLE "tracks" DISABLE TRIGGER ALL
   (0.6ms)  DELETE FROM "editors";
   (0.3ms)  DELETE FROM "editor_authentications";
   (0.2ms)  DELETE FROM "dashboard_wallpapers";
   (0.2ms)  DELETE FROM "plans";
   (0.3ms)  DELETE FROM "editor_channel_roles";
   (0.1ms)  DELETE FROM "track_tags_hierarchies";
   (0.2ms)  DELETE FROM "track_tags";
   (0.2ms)  DELETE FROM "channels";
   (0.2ms)  DELETE FROM "servers";
   (0.2ms)  DELETE FROM "track_tag_associations";
   (0.5ms)  DELETE FROM "track_broadcasts";
   (0.4ms)  DELETE FROM "track_uploads";
   (0.2ms)  DELETE FROM "tracks";
   (1.5ms)  ALTER TABLE "schema_migrations" ENABLE TRIGGER ALL;ALTER TABLE "editors" ENABLE TRIGGER ALL;ALTER TABLE "editor_authentications" ENABLE TRIGGER ALL;ALTER TABLE "dashboard_wallpapers" ENABLE TRIGGER ALL;ALTER TABLE "plans" ENABLE TRIGGER ALL;ALTER TABLE "editor_channel_roles" ENABLE TRIGGER ALL;ALTER TABLE "track_tags_hierarchies" ENABLE TRIGGER ALL;ALTER TABLE "track_tags" ENABLE TRIGGER ALL;ALTER TABLE "channels" ENABLE TRIGGER ALL;ALTER TABLE "servers" ENABLE TRIGGER ALL;ALTER TABLE "track_tag_associations" ENABLE TRIGGER ALL;ALTER TABLE "track_broadcasts" ENABLE TRIGGER ALL;ALTER TABLE "track_uploads" ENABLE TRIGGER ALL;ALTER TABLE "tracks" ENABLE TRIGGER ALL

While triggers are initially disabled, due to Database Cleaner activity, they are enabled again before the actual test runs.

In development/production after creating/updating a record, Track#search_vector contains data. In test it is always nil.

What am I doing wrong?

¿Fue útil?

Solución

Be sure to use a :deletion strategy (DatabaseCleaner.clean_with(:deletion) and DatabaseCleaner.strategy = :deletion). If you use :truncate, my guess is that it will basically recreate the table, and in doing so won't re-build the table trigger.

See Postgresql Truncation speed

A TRUNCATE gives you a completely new table and indexes as if they were just CREATEed. It's like you deleted all the records, reindexed the table and did a vacuum full.

I hope this works for you - works for me.

Licenciado bajo: CC-BY-SA con atribución
No afiliado a StackOverflow
scroll top