Skip to content

Commit

Permalink
Fix lock order when dropping index
Browse files Browse the repository at this point in the history
If an index is dropped, it is necessary to lock the heap table (of
the index) before the index since all normal operations do it in this
order. When dropping an index, we did not take all the necessary locks
in the right order before calling `performMultipleDeletions`, which can
cause deadlocks when dropping an index on a hypertable at the same time
as running a utility statement that takes heavy locks, e.g., VACUUM or
ANALYZE.

Adding a isolation test as well that will generate a deadlock if the
index and table locks are not taken in the correct order.
  • Loading branch information
mkindahl committed Jan 30, 2025
1 parent 06ccc50 commit e584632
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 22 deletions.
1 change: 1 addition & 0 deletions .unreleased/pr_7600
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixes: #7600 Fix lock order when dropping index
106 changes: 84 additions & 22 deletions src/chunk_index.c
Original file line number Diff line number Diff line change
Expand Up @@ -573,17 +573,70 @@ typedef struct ChunkIndexDeleteData
bool drop_index;
} ChunkIndexDeleteData;

/* Find all internal dependencies to be able to delete all the objects in one
/*
* Lock object.
*
* In particular, we need to ensure that we lock the table of an index before
* locking the index, or run the risk of ending up in a deadlock since the
* normal locking order is table first, index second. Since we're not a
* concurrent delete, we take a strong lock for this.
*
* It is also necessary that the parent table is locked first, but we have
* already done that at this stage, so it does not need to be done explicitly.
*/
static bool
chunk_lock_object_for_deletion(const ObjectAddress *obj)
{
/*
* If we're locking an index, we need to lock the table first. See
* RangeVarCallbackForDropRelation() in tablecmds.c. We can ignore
* partition indexes since we're not using that.
*/
char relkind = get_rel_relkind(obj->objectId);

/*
* If we cannot find the object, it might have been concurrently deleted
* (we do not have locks on objects yet).
*/
if (relkind == '\0')
return false;

Check warning on line 602 in src/chunk_index.c

View check run for this annotation

Codecov / codecov/patch

src/chunk_index.c#L602

Added line #L602 was not covered by tests
if (relkind == RELKIND_INDEX)
{
Oid heapOid = IndexGetRelation(obj->objectId, true);
if (OidIsValid(heapOid))
LockRelationOid(heapOid, AccessExclusiveLock);
}

LockRelationOid(obj->objectId, AccessExclusiveLock);
return true;
}

/*
* Find all internal dependencies to be able to delete all the objects in one
* go. We do this by scanning the dependency table and keeping all the tables
* in our internal schema. */
static void
chunk_collect_objects_for_deletion(const ObjectAddress *relobj, ObjectAddresses *objects)
* in our internal schema.
*
* We also lock the objects in the correct order (meaning table first, index
* second) here to make sure that we do not end up with deadlocks.
*
* We return 'true' if we added any objects, and 'false' otherwise.
*/
static bool
chunk_collect_and_lock_objects_for_deletion(const ObjectAddress *relobj, ObjectAddresses *objects)
{
Relation deprel = table_open(DependRelationId, RowExclusiveLock);
ScanKeyData scankey[2];
SysScanDesc scan;
HeapTuple tup;

/*
* If the object disappeared before we managed to get a lock on it, there
* is nothing more to do so just return early and indicate that there are
* no objects to delete.
*/
if (!chunk_lock_object_for_deletion(relobj))
return false;

Check warning on line 638 in src/chunk_index.c

View check run for this annotation

Codecov / codecov/patch

src/chunk_index.c#L638

Added line #L638 was not covered by tests

add_exact_object_address(relobj, objects);

ScanKeyInit(&scankey[0],
Expand All @@ -608,18 +661,13 @@ chunk_collect_objects_for_deletion(const ObjectAddress *relobj, ObjectAddresses
{
Form_pg_depend record = (Form_pg_depend) GETSTRUCT(tup);
ObjectAddress refobj = { .classId = record->refclassid, .objectId = record->refobjid };

switch (record->deptype)
{
case DEPENDENCY_INTERNAL:
add_exact_object_address(&refobj, objects);
break;
default:
continue; /* Do nothing */
}
if (record->deptype == DEPENDENCY_INTERNAL && chunk_lock_object_for_deletion(&refobj))
add_exact_object_address(&refobj, objects);

Check warning on line 665 in src/chunk_index.c

View check run for this annotation

Codecov / codecov/patch

src/chunk_index.c#L665

Added line #L665 was not covered by tests
}

systable_endscan(scan);
table_close(deprel, RowExclusiveLock);
return true;
}

static ScanTupleResult
Expand All @@ -642,7 +690,8 @@ chunk_index_tuple_delete(TupleInfo *ti, void *data)

if (OidIsValid(idxobj.objectId))
{
/* If we use performDeletion here it will fail if there are
/*
* If we use performDeletion() here it will fail if there are
* internal dependencies on the object since we are restricting
* the cascade.
*
Expand All @@ -651,15 +700,28 @@ chunk_index_tuple_delete(TupleInfo *ti, void *data)
* internal dependencies and use the function
* performMultipleDeletions.
*
* The function performMultipleDeletions accept a list of objects
* and if there are dependencies between any of the objects given
* to the function, it will not generate an error for that but
* rather proceed with the deletion. If there are any dependencies
* (internal or not) outside this set of objects, it will still
* abort the deletion and print an error. */
* We lock the objects to delete first to make sure that the lock
* order is correct. This is done inside RemoveRelations and
* performMultipleDeletions() expect these locks to be taken
* first. If not, it will take very rudimentary locks, which will
* cause deadlocks in some cases because the lock order is not
* correct.
*
* Since we do not have any locks on any objects at this point,
* the relations might have disappeared before we had a chance to
* lock them. In this case it is not necessary to do an explicit
* call to performMultipleDeletions().
*
* The function performMultipleDeletions() accept a list of
* objects and if there are dependencies between any of the
* objects given to the function, it will not generate an error
* for that but rather proceed with the deletion. If there are any
* dependencies (internal or not) outside this set of objects, it
* will still abort the deletion and print an error.
*/
ObjectAddresses *objects = new_object_addresses();
chunk_collect_objects_for_deletion(&idxobj, objects);
performMultipleDeletions(objects, DROP_RESTRICT, 0);
if (chunk_collect_and_lock_objects_for_deletion(&idxobj, objects))
performMultipleDeletions(objects, DROP_RESTRICT, 0);
free_object_addresses(objects);
}
}
Expand Down
24 changes: 24 additions & 0 deletions tsl/test/isolation/expected/deadlock_drop_index_vacuum.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Parsed test spec with 4 sessions

starting permutation: S1_lock S3_vacuum S2_lock S1_commit S4_drop_index S2_commit
step S1_lock:
LOCK TABLE _timescaledb_internal.metrics_chunk_2 IN ACCESS EXCLUSIVE MODE;

step S3_vacuum:
VACUUM ANALYZE _timescaledb_internal.metrics_chunk_2;
<waiting ...>
step S2_lock:
LOCK TABLE _timescaledb_internal.metrics_chunk_2 IN ACCESS EXCLUSIVE MODE;
<waiting ...>
step S1_commit:
COMMIT;

step S2_lock: <... completed>
step S4_drop_index:
DROP INDEX metrics_device_time_idx;
<waiting ...>
step S2_commit:
COMMIT;

step S3_vacuum: <... completed>
step S4_drop_index: <... completed>
1 change: 1 addition & 0 deletions tsl/test/isolation/specs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ list(
cagg_multi_iso.spec
cagg_concurrent_refresh.spec
deadlock_drop_chunks_compress.spec
deadlock_drop_index_vacuum.spec
parallel_compression.spec
osm_range_updates_iso.spec)

Expand Down
130 changes: 130 additions & 0 deletions tsl/test/isolation/specs/deadlock_drop_index_vacuum.spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# This file and its contents are licensed under the Timescale License.
# Please see the included NOTICE for copyright information and
# LICENSE-TIMESCALE for a copy of the license.

# Order of locks in drop index and vacuum analyze was wrong, and DROP
# INDEX took the locks in order index-table, while VACUUM ANALYZE took
# them in order table-index.
#
# Create deadlock if the locking order is wrong. Problem here is that
# vacuum takes two locks. First one to read a table and then one to do
# the actual vacuum. For this reason we need two processes that end up
# in the deadlock (VACUUM ANALYZE and DROP INDEX respectively) and
# then two extra sessions working in lock-step to create the deadlock.
#
# It can be illustrated with this sequence, where we have a chunk
# table and a chunk index. The sessions between the brackets ([]) is
# the lock queue and session holding the lock is in angle brackets
# (<>) is the session that holds the lock on the object in question:
#
# S1: Lock chunk from hypertable
# index: []
# table: <S1> []
# S3: Start VACUUM ANALYZE, will attempt to lock chunk table, but get queued.
# index: []
# table: <S1> [S3]
# S2: Lock chunk table from hypertable, which will be queued
# index: []
# table: <S1> [S3 S2]
# S1: Unlock chunk table
# index: []
# table: [S3 S2]
# S3: VACUUM ANALYZE continues and takes the lock on the chunk table.
# index: []
# table: <S3> [S2]
# S3: VACUUM ANALYZE will release the lock on the chunk table.
# index: []
# table: [S2]
# S3: VACUUM ANALYZE will attempt to lock the chunk table again
# index: []
# table: [S2 S3]
# S2: The LOCK statement gets the lock and VACUUM will wait
# index: []
# table: <S2> [S3]
# S4: DROP INDEX starts and takes lock in index first and then is
# queued for the chunk table
# index: <S4> []
# table: <S2> [S3 S4]
# S2: Release lock on chunk table
# index: <S4> []
# table: [S3 S4]
# S3: VACUUM continues and takes table lock and then tries index lock
# index: <S4> [S3]
# table: <S3> [S4]
# Deadlock

setup {
CREATE TABLE metrics (time timestamptz, device_id integer, temp float);
SELECT create_hypertable('metrics', 'time', chunk_time_interval => interval '1 day');
INSERT INTO metrics
SELECT generate_series('2018-12-01 00:00'::timestamp,
'2018-12-03 00:00',
'1 hour'),
(random()*30)::int,
random()*80 - 40;

CREATE INDEX metrics_device_time_idx ON metrics(device_id, time NULLS FIRST);

-- Rename chunks so that we have known names. We cannot execute
-- VACUUM in a function block.
DO $$
DECLARE
chunk regclass;
count int = 1;
BEGIN
FOR chunk IN SELECT ch FROM show_chunks('metrics') ch
LOOP
EXECUTE format('ALTER TABLE %s RENAME TO metrics_chunk_%s', chunk, count);
count = count + 1;
END LOOP;
END
$$;
}

teardown {
DROP TABLE metrics;
}

session "S1"
setup {
START TRANSACTION;
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET LOCAL lock_timeout = '500ms';
SET LOCAL deadlock_timeout = '300ms';
}

step "S1_lock" {
LOCK TABLE _timescaledb_internal.metrics_chunk_2 IN ACCESS EXCLUSIVE MODE;
}

step "S1_commit" {
COMMIT;
}

session "S2"
setup {
START TRANSACTION;
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET LOCAL lock_timeout = '500ms';
SET LOCAL deadlock_timeout = '300ms';
}

step "S2_lock" {
LOCK TABLE _timescaledb_internal.metrics_chunk_2 IN ACCESS EXCLUSIVE MODE;
}

step "S2_commit" {
COMMIT;
}

session "S3"
step "S3_vacuum" {
VACUUM ANALYZE _timescaledb_internal.metrics_chunk_2;
}

session "S4"
step "S4_drop_index" {
DROP INDEX metrics_device_time_idx;
}

permutation S1_lock S3_vacuum S2_lock S1_commit S4_drop_index S2_commit

0 comments on commit e584632

Please sign in to comment.