From cc2253f09c7353cef5156a536ed20b75bad0e589 Mon Sep 17 00:00:00 2001 From: Paul Gofman Date: Mon, 4 Jul 2022 15:11:23 -0500 Subject: [PATCH] fsync: Implement reference counting for sync objects shared memory. CW-Bug-Id: #20826 --- dlls/ntdll/unix/fsync.c | 153 ++++++++++++++++++++++++++++++++++++---- server/fsync.c | 64 ++++++++++++++++- server/fsync.h | 1 + server/process.c | 6 +- server/protocol.def | 5 ++ 5 files changed, 213 insertions(+), 16 deletions(-) diff --git a/dlls/ntdll/unix/fsync.c b/dlls/ntdll/unix/fsync.c index 38b0f040de9..951488129e9 100644 --- a/dlls/ntdll/unix/fsync.c +++ b/dlls/ntdll/unix/fsync.c @@ -200,22 +200,28 @@ struct semaphore { int count; int max; + int ref; + int last_pid; }; -C_ASSERT(sizeof(struct semaphore) == 8); +C_ASSERT(sizeof(struct semaphore) == 16); struct event { int signaled; int unused; + int ref; + int last_pid; }; -C_ASSERT(sizeof(struct event) == 8); +C_ASSERT(sizeof(struct event) == 16); struct mutex { int tid; int count; /* recursion count */ + int ref; + int last_pid; }; -C_ASSERT(sizeof(struct mutex) == 8); +C_ASSERT(sizeof(struct mutex) == 16); static char shm_name[29]; static int shm_fd; @@ -227,8 +233,8 @@ static pthread_mutex_t shm_addrs_mutex = PTHREAD_MUTEX_INITIALIZER; static void *get_shm( unsigned int idx ) { - int entry = (idx * 8) / pagesize; - int offset = (idx * 8) % pagesize; + int entry = (idx * 16) / pagesize; + int offset = (idx * 16) % pagesize; void *ret; pthread_mutex_lock( &shm_addrs_mutex ); @@ -328,6 +334,59 @@ static void add_to_list( HANDLE handle, enum fsync_type type, void *shm ) unlock_obj_cache(); } +static void grab_object( struct fsync *obj ) +{ + int *shm = obj->shm; + + __atomic_add_fetch( &shm[2], 1, __ATOMIC_SEQ_CST ); +} + +static unsigned int shm_index_from_shm( char *shm ) +{ + unsigned int count = shm_addrs_size; + unsigned int i, idx_offset; + + for (i = 0; i < count; ++i) + { + if (shm >= (char *)shm_addrs[i] && shm < (char *)shm_addrs[i] + pagesize) + { + idx_offset = (shm - (char *)shm_addrs[i]) / 16; + return i * (pagesize / 16) + idx_offset; + } + } + + ERR( "Index for shm %p not found.\n", shm ); + return ~0u; +} + +static void put_object( struct fsync *obj ) +{ + int *shm = obj->shm; + + if (__atomic_load_n( &shm[2], __ATOMIC_SEQ_CST ) == 1) + { + /* We are holding the last reference, it should be released on server so shm idx get freed. */ + SERVER_START_REQ( fsync_free_shm_idx ) + { + req->shm_idx = shm_index_from_shm( obj->shm ); + wine_server_call( req ); + } + SERVER_END_REQ; + } + else + { + __atomic_sub_fetch( &shm[2], 1, __ATOMIC_SEQ_CST ); + } +} + +static void put_object_from_wait( struct fsync *obj ) +{ + int *shm = obj->shm; + + __sync_val_compare_and_swap( &shm[3], GetCurrentProcessId(), 0 ); + put_object( obj ); +} + static BOOL get_cached_object( HANDLE handle, struct fsync *obj ) { BOOL ret = TRUE; @@ -337,7 +396,13 @@ static BOOL get_cached_object( HANDLE handle, struct fsync *obj ) lock_obj_cache(); if (!fsync_list[entry][idx].type) ret = FALSE; - else *obj = fsync_list[entry][idx]; + else + { + *obj = fsync_list[entry][idx]; + grab_object( obj ); + /* Here inside cache lock we should have at least one reference previously held by our handle. */ + assert( ((int *)obj->shm)[2] > 1 ); + } unlock_obj_cache(); return ret; } @@ -383,9 +448,24 @@ static NTSTATUS get_object( HANDLE handle, struct fsync *obj ) obj->type = type; obj->shm = get_shm( shm_idx ); add_to_list( handle, type, obj->shm ); + /* get_fsync_idx server request increments shared mem refcount, so not grabbing object here. */ return ret; } +static NTSTATUS get_object_for_wait( HANDLE handle, struct fsync *obj ) +{ + NTSTATUS ret; + int *shm; + + if ((ret = get_object( handle, obj ))) return ret; + + shm = obj->shm; + /* Give wineserver a chance to cleanup shm index if the process + * is killed while we are waiting on the object. */ + __atomic_store_n( &shm[3], GetCurrentProcessId(), __ATOMIC_SEQ_CST ); + return STATUS_SUCCESS; +} + NTSTATUS fsync_close( HANDLE handle ) { UINT_PTR entry, idx = handle_to_index( handle, &entry ); @@ -556,13 +636,17 @@ NTSTATUS fsync_release_semaphore( HANDLE handle, ULONG count, ULONG *prev ) { current = semaphore->count; if (count + current > semaphore->max) + { + put_object( &obj ); return STATUS_SEMAPHORE_LIMIT_EXCEEDED; + } } while (__sync_val_compare_and_swap( &semaphore->count, current, count + current ) != current); if (prev) *prev = current; futex_wake( &semaphore->count, INT_MAX ); + put_object( &obj ); return STATUS_SUCCESS; } @@ -582,6 +666,7 @@ NTSTATUS fsync_query_semaphore( HANDLE handle, void *info, ULONG *ret_len ) out->MaximumCount = semaphore->max; if (ret_len) *ret_len = sizeof(*out); + put_object( &obj ); return STATUS_SUCCESS; } @@ -618,13 +703,17 @@ NTSTATUS fsync_set_event( HANDLE handle, LONG *prev ) event = obj.shm; if (obj.type != FSYNC_MANUAL_EVENT && obj.type != FSYNC_AUTO_EVENT) + { + put_object( &obj ); return STATUS_OBJECT_TYPE_MISMATCH; + } if (!(current = __atomic_exchange_n( &event->signaled, 1, __ATOMIC_SEQ_CST ))) futex_wake( &event->signaled, INT_MAX ); if (prev) *prev = current; + put_object( &obj ); return STATUS_SUCCESS; } @@ -644,6 +733,7 @@ NTSTATUS fsync_reset_event( HANDLE handle, LONG *prev ) if (prev) *prev = current; + put_object( &obj ); return STATUS_SUCCESS; } @@ -673,6 +763,7 @@ NTSTATUS fsync_pulse_event( HANDLE handle, LONG *prev ) if (prev) *prev = current; + put_object( &obj ); return STATUS_SUCCESS; } @@ -692,6 +783,7 @@ NTSTATUS fsync_query_event( HANDLE handle, void *info, ULONG *ret_len ) out->EventType = (obj.type == FSYNC_AUTO_EVENT ? SynchronizationEvent : NotificationEvent); if (ret_len) *ret_len = sizeof(*out); + put_object( &obj ); return STATUS_SUCCESS; } @@ -724,7 +816,11 @@ NTSTATUS fsync_release_mutex( HANDLE handle, LONG *prev ) if ((ret = get_object( handle, &obj ))) return ret; mutex = obj.shm; - if (mutex->tid != GetCurrentThreadId()) return STATUS_MUTANT_NOT_OWNED; + if (mutex->tid != GetCurrentThreadId()) + { + put_object( &obj ); + return STATUS_MUTANT_NOT_OWNED; + } if (prev) *prev = mutex->count; @@ -734,6 +830,7 @@ NTSTATUS fsync_release_mutex( HANDLE handle, LONG *prev ) futex_wake( &mutex->tid, INT_MAX ); } + put_object( &obj ); return STATUS_SUCCESS; } @@ -754,6 +851,7 @@ NTSTATUS fsync_query_mutex( HANDLE handle, void *info, ULONG *ret_len ) out->AbandonedState = (mutex->tid == ~0); if (ret_len) *ret_len = sizeof(*out); + put_object( &obj ); return STATUS_SUCCESS; } @@ -792,6 +890,14 @@ static NTSTATUS do_single_wait( int *addr, int val, const struct timespec64 *end return STATUS_PENDING; } +static void put_objects( struct fsync *objs, unsigned int count ) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + if (objs[i].type) put_object_from_wait( &objs[i] ); +} + static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, BOOLEAN wait_any, BOOLEAN alertable, const LARGE_INTEGER *timeout ) { @@ -830,7 +936,7 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, for (i = 0; i < count; i++) { - ret = get_object( handles[i], &objs[i] ); + ret = get_object_for_wait( handles[i], &objs[i] ); if (ret == STATUS_SUCCESS) { assert( objs[i].type ); @@ -844,6 +950,7 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, } else { + put_objects( objs, i ); return ret; } } @@ -854,7 +961,10 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, if (has_fsync && has_server) FIXME("Can't wait on fsync and server objects at the same time!\n"); else if (has_server) + { + put_objects( objs, count ); return STATUS_NOT_IMPLEMENTED; + } if (TRACE_ON(fsync)) { @@ -909,6 +1019,7 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, { TRACE("Woken up by handle %p [%d].\n", handles[i], i); if (waited) simulate_sched_quantum(); + put_objects( objs, count ); return i; } futex_vector_set( &futexes[i], &semaphore->count, 0 ); @@ -924,6 +1035,7 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, TRACE("Woken up by handle %p [%d].\n", handles[i], i); mutex->count++; if (waited) simulate_sched_quantum(); + put_objects( objs, count ); return i; } @@ -932,12 +1044,14 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, TRACE("Woken up by handle %p [%d].\n", handles[i], i); mutex->count = 1; if (waited) simulate_sched_quantum(); + put_objects( objs, count ); return i; } else if (tid == ~0 && (tid = __sync_val_compare_and_swap( &mutex->tid, ~0, GetCurrentThreadId() )) == ~0) { TRACE("Woken up by abandoned mutex %p [%d].\n", handles[i], i); mutex->count = 1; + put_objects( objs, count ); return STATUS_ABANDONED_WAIT_0 + i; } @@ -956,6 +1070,7 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, TRACE("Woken up by handle %p [%d].\n", handles[i], i); if (waited) simulate_sched_quantum(); + put_objects( objs, count ); return i; } futex_vector_set( &futexes[i], &event->signaled, 0 ); @@ -974,6 +1089,7 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, TRACE("Woken up by handle %p [%d].\n", handles[i], i); if (waited) simulate_sched_quantum(); + put_objects( objs, count ); return i; } futex_vector_set( &futexes[i], &event->signaled, 0 ); @@ -1008,6 +1124,7 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, /* Unlike esync, we already know that we've timed out, so we * can avoid a syscall. */ TRACE("Wait timed out.\n"); + put_objects( objs, count ); return STATUS_TIMEOUT; } @@ -1020,6 +1137,7 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, if (ret == -1 && errno == ETIMEDOUT) { TRACE("Wait timed out.\n"); + put_objects( objs, count ); return STATUS_TIMEOUT; } else waited = TRUE; @@ -1094,6 +1212,7 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, if (status == STATUS_TIMEOUT) { TRACE("Wait timed out.\n"); + put_objects( objs, count ); return status; } else if (status == STATUS_USER_APC) @@ -1183,9 +1302,11 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, if (abandoned) { TRACE("Wait successful, but some object(s) were abandoned.\n"); + put_objects( objs, count ); return STATUS_ABANDONED; } TRACE("Wait successful.\n"); + put_objects( objs, count ); return STATUS_SUCCESS; tooslow: @@ -1230,6 +1351,8 @@ static NTSTATUS __fsync_wait_objects( DWORD count, const HANDLE *handles, userapc: TRACE("Woken up by user APC.\n"); + put_objects( objs, count ); + /* We have to make a server call anyway to get the APC to execute, so just * delegate down to server_wait(). */ ret = server_wait( NULL, 0, SELECT_INTERRUPTIBLE | SELECT_ALERTABLE, &zero ); @@ -1269,10 +1392,14 @@ NTSTATUS fsync_wait_objects( DWORD count, const HANDLE *handles, BOOLEAN wait_an struct fsync obj; NTSTATUS ret; - if (count && !get_object( handles[count - 1], &obj ) && obj.type == FSYNC_QUEUE) + if (count && !get_object( handles[count - 1], &obj )) { - msgwait = TRUE; - server_set_msgwait( 1 ); + if (obj.type == FSYNC_QUEUE) + { + msgwait = TRUE; + server_set_msgwait( 1 ); + } + put_object( &obj ); } ret = __fsync_wait_objects( count, handles, wait_any, alertable, timeout ); @@ -1304,8 +1431,10 @@ NTSTATUS fsync_signal_and_wait( HANDLE signal, HANDLE wait, BOOLEAN alertable, ret = fsync_release_mutex( signal, NULL ); break; default: - return STATUS_OBJECT_TYPE_MISMATCH; + ret = STATUS_OBJECT_TYPE_MISMATCH; + break; } + put_object( &obj ); if (ret) return ret; return fsync_wait_objects( 1, &wait, TRUE, alertable, timeout ); diff --git a/server/fsync.c b/server/fsync.c index 4d477e3aa1e..ada3c217629 100644 --- a/server/fsync.c +++ b/server/fsync.c @@ -214,8 +214,8 @@ static void fsync_destroy( struct object *obj ) static void *get_shm( unsigned int idx ) { - int entry = (idx * 8) / pagesize; - int offset = (idx * 8) % pagesize; + int entry = (idx * 16) / pagesize; + int offset = (idx * 16) % pagesize; if (entry >= shm_addrs_size) { @@ -296,7 +296,7 @@ unsigned int fsync_alloc_shm( int low, int high ) shm_idx = alloc_shm_idx_from_word( old_size ); } - while (shm_idx * 8 >= shm_size) + while (shm_idx * 16 >= shm_size) { /* Better expand the shm section. */ shm_size += pagesize; @@ -312,6 +312,8 @@ unsigned int fsync_alloc_shm( int low, int high ) assert(shm); shm[0] = low; shm[1] = high; + shm[2] = 1; /* Reference count. */ + shm[3] = 0; /* Last reference process id. */ return shm_idx; #else @@ -323,9 +325,24 @@ void fsync_free_shm_idx( int shm_idx ) { unsigned int idx; uint64_t mask; + int *shm; assert( shm_idx ); assert( shm_idx < shm_idx_free_map_size * BITS_IN_FREE_MAP_WORD ); + + shm = get_shm( shm_idx ); + if (shm[2] <= 0) + { + fprintf( stderr, "wineserver: fsync err: shm refcount is %d.\n", shm[2] ); + return; + } + + if (__atomic_sub_fetch( &shm[2], 1, __ATOMIC_SEQ_CST )) + { + /* Sync object is still referenced in a process. */ + return; + } + idx = shm_idx / BITS_IN_FREE_MAP_WORD; mask = (uint64_t)1 << (shm_idx % BITS_IN_FREE_MAP_WORD); assert( !(shm_idx_free_map[idx] & mask) ); @@ -334,6 +351,31 @@ void fsync_free_shm_idx( int shm_idx ) shm_idx_free_search_start_hint = idx; } +/* Try to cleanup the shared mem indices locked by the wait on the killed processes. + * This is not fully reliable but should avoid leaking the majority of indices on + * process kill. */ +void fsync_cleanup_process_shm_indices( process_id_t id ) +{ + uint64_t free_word; + unsigned int i, j; + void *shmbase; + int *shm; + + for (i = 0; i < shm_idx_free_map_size; ++i) + { + free_word = shm_idx_free_map[i]; + if (free_word == ~(uint64_t)0) continue; + shmbase = get_shm( i * BITS_IN_FREE_MAP_WORD ); + for (j = !i; j < BITS_IN_FREE_MAP_WORD; ++j) + { + shm = (int *)((char *)shmbase + j * 16); + if (!(free_word & ((uint64_t)1 << j)) && shm[3] == id + && __atomic_load_n( &shm[2], __ATOMIC_SEQ_CST ) == 1) + fsync_free_shm_idx( i * BITS_IN_FREE_MAP_WORD + j ); + } + } +} + static int type_matches( enum fsync_type type1, enum fsync_type type2 ) { return (type1 == type2) || @@ -393,6 +435,8 @@ struct fsync_event { int signaled; int unused; + int ref; + int last_pid; }; void fsync_wake_futex( unsigned int shm_idx ) @@ -560,8 +604,12 @@ DECL_HANDLER(get_fsync_idx) if (obj->ops->get_fsync_idx) { + int *shm; + reply->shm_idx = obj->ops->get_fsync_idx( obj, &type ); reply->type = type; + shm = get_shm( reply->shm_idx ); + __atomic_add_fetch( &shm[2], 1, __ATOMIC_SEQ_CST ); } else { @@ -580,3 +628,13 @@ DECL_HANDLER(get_fsync_apc_idx) { reply->shm_idx = current->fsync_apc_idx; } + +DECL_HANDLER(fsync_free_shm_idx) +{ + if (!req->shm_idx || req->shm_idx >= shm_idx_free_map_size * BITS_IN_FREE_MAP_WORD) + { + set_error( STATUS_INVALID_PARAMETER ); + return; + } + fsync_free_shm_idx( req->shm_idx ); +} diff --git a/server/fsync.h b/server/fsync.h index ee1a729e77e..d4bd889a7f8 100644 --- a/server/fsync.h +++ b/server/fsync.h @@ -33,3 +33,4 @@ extern const struct object_ops fsync_ops; extern void fsync_set_event( struct fsync *fsync ); extern void fsync_reset_event( struct fsync *fsync ); extern void fsync_abandon_mutexes( struct thread *thread ); +extern void fsync_cleanup_process_shm_indices( process_id_t id ); diff --git a/server/process.c b/server/process.c index fcdb5f3bd84..59430f82808 100644 --- a/server/process.c +++ b/server/process.c @@ -805,7 +805,11 @@ static void process_destroy( struct object *obj ) free( process->dir_cache ); free( process->image ); if (do_esync()) close( process->esync_fd ); - if (process->fsync_idx) fsync_free_shm_idx( process->fsync_idx ); + if (process->fsync_idx) + { + fsync_cleanup_process_shm_indices( process->id ); + fsync_free_shm_idx( process->fsync_idx ); + } } /* dump a process on stdout for debugging purposes */ diff --git a/server/protocol.def b/server/protocol.def index e68c85209bb..5b930b06ef1 100644 --- a/server/protocol.def +++ b/server/protocol.def @@ -3928,3 +3928,8 @@ enum fsync_type @REPLY unsigned int shm_idx; @END + +@REQ(fsync_free_shm_idx) + unsigned int shm_idx; +@REPLY +@END