Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RHELC-1124] Refactor applock to have a more robust API. #979

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 70 additions & 57 deletions convert2rhel/applock.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import errno
import logging
import os
import stat
import tempfile


Expand All @@ -45,11 +46,14 @@ class ApplicationLock:

The implementation uses a standard Linux PID file. When a program
that uses ApplicationLock starts, it writes its process ID to a
file in /var/run/lock. If the file already exists, it reads the
PID and checks to see if the process is still running. If the
process is still running, it raises ApplicationLockedError. If the
process is not running, it removes the file and tries to lock it
again.
file in /var/run/lock. If the file already exists, it reads and
reports the PID that it finds.

While it would be nice to try to clean up a stale process ID file
(i.e., a file created by a process that has exited) that opens up
a race condition that would require us to leave the process ID
file pointer open across the invocation of several methods, which
would complicate the code more than the feature is worth.

For safety, unexpected conditions, like garbage in the file or
bad permissions, are treated as if the application is locked,
Expand All @@ -59,24 +63,24 @@ class ApplicationLock:
def __init__(self, name):
# Our application name
self._name = name
# Do we think we locked the pid file?
self._locked = False
# Our process ID
# Our process ID. We save this when the lock is created so it will be
# consistent even if we check from inside a fork.
self._pid = os.getpid()
# Path to the file that contains the process id
self._pidfile = os.path.join(_DEFAULT_LOCK_DIR, self._name + ".pid")

def __str__(self):
if self._locked:
if self.is_locked:
status = "locked"
else:
status = "unlocked"
return "%s PID %d %s" % (self._pidfile, self._pid, status)

def _try_create(self):
"""Try to create the lock file. If this succeeds, the lock file
exists and we created it. If an exception other than the one
we expect is raised, re-raises it.
"""Try to create the lock file. If this succeeds, the lock
file exists and we created it, so we hold the lock. If an
exception other than the one we expect is raised, re-raises
it.

:returns: True if we created the lock, False if we didn't.
"""
Expand All @@ -89,6 +93,9 @@ def _try_create(self):
# Note that NamedTemporaryFile will clean up the file it created,
# but the lockfile we created by doing the link will stay around.
with tempfile.NamedTemporaryFile(mode="w", suffix=".pid", prefix=self._name, dir=_DEFAULT_LOCK_DIR) as f:
# stat.S_IRUSR = Owner has read permission
# stat.S_IWUSR = Owner has write permission
os.chmod(f.name, stat.S_IRUSR | stat.S_IWUSR)
f.write(str(self._pid) + "\n")
f.flush()
try:
Expand All @@ -97,75 +104,81 @@ def _try_create(self):
except OSError as exc:
if exc.errno == errno.EEXIST:
return False
raise exc
Venefilyn marked this conversation as resolved.
Show resolved Hide resolved
raise
loggerinst.debug("%s." % self)
return True

r0x0d marked this conversation as resolved.
Show resolved Hide resolved
def _get_pid_from_file(self):
"""If the lock file exists, return the process ID stored in
the file. No check is made whether the process exists. Because
of the way we create the lock file, the case where it has bad
data or is empty is a pathological case.

:returns: The process ID, or None if the file does not exist.
:raises ApplicationLockedError: Bad data in the file.
"""
try:
with open(self._pidfile, "r") as filep:
file_contents = filep.read()
pid = int(file_contents.rstrip())
if pid:
return pid
except ValueError:
raise ApplicationLockedError("%s has invalid contents" % (self._pidfile))
except (IOError, OSError) as exc:
if exc.errno == errno.ENOENT:
return None
raise
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, we should be raising exception again

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have to leave the file open and close it later explicitly because the file lock is tied to the file descriptor. The file lock is required to plug a race condition in which the pid file is overwritten by another process between the point where we read its contents and unlink it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bare raise statement reraises the current exception; @abadger requested that I use this idiom.

return None

@property
def is_locked(self):
"""Test whether this object is locked."""
return self._locked

@staticmethod
def _pid_exists(pid):
"""Test whether a particular process exists."""
"""Test whether this object was locked by this instance of
the application."""
pid = self._get_pid_from_file()
return pid == self._pid

def _safe_unlink(self):
"""Unlink the lock file. If the unlink fails because the file
doesn't exist, swallow the exception; this avoids spurious
errors due to race conditions.
"""
try:
# Bulletproofing: avoid killing init or all processes.
if pid > 1:
os.kill(pid, 0)
os.unlink(self._pidfile)
except OSError as exc:
# The only other (theoretical) possibility is EPERM, which
# would mean the process exists.
if exc.errno == errno.ESRCH:
return False
return True
# In Python 3 this could be changed to FileNotFoundError.
if exc.errno == errno.ENOENT:
return
raise

def try_to_lock(self, _recursive=False):
"""Try to get a lock on this application. If successful,
the application will be locked; the lock should be released
with unlock().
def try_to_lock(self):
"""Try to get a lock on this application. If this method does
not raise an Exception, the application will be locked and we
hold the lock; the lock should be released with unlock().

If the file has unexpected contents, for safety we treat the
application as locked, since it is probably the result of
manual meddling, intentional or otherwise.

:keyword _recursive: True if we are being called recursively
and should not try to clean up the lockfile
again.
:raises ApplicationLockedError: the application is locked
"""
if self._try_create():
self._locked = True
return
if _recursive:
raise ApplicationLockedError("Cannot lock %s" % self._name)

with open(self._pidfile, "r") as f:
file_contents = f.read()
try:
pid = int(file_contents.rstrip())
except ValueError:
raise ApplicationLockedError("Lock file %s is corrupt" % self._pidfile)

if self._pid_exists(pid):
raise ApplicationLockedError("%s locked by process %d" % (self._pidfile, pid))
# The lock file was created by a process that has exited;
# remove it and try again.
loggerinst.info("Cleaning up lock held by exited process %d." % pid)
os.unlink(self._pidfile)
self.try_to_lock(_recursive=True)
pid = self._get_pid_from_file()
if pid == self._pid:
return
raise ApplicationLockedError("%s locked by process %d" % (self._pidfile, pid))

def unlock(self):
"""Release the lock on this application.

Note that if the unlink fails (a pathological failure) the
object will stay locked and the OSError or other
Note that if the safe unlink fails (a pathological failure)
the object will stay locked and the OSError or other
system-generated exception will be raised.
"""
if not self._locked:
if not self.is_locked:
return
os.unlink(self._pidfile)
self._locked = False
self._safe_unlink()
loggerinst.debug("%s." % self)

def __enter__(self):
Expand Down
7 changes: 5 additions & 2 deletions convert2rhel/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,11 @@ def main():
try:
with applock.ApplicationLock("convert2rhel"):
return main_locked()
except applock.ApplicationLockedError:
loggerinst.warning("Another copy of convert2rhel is running.\n")
except applock.ApplicationLockedError as exc:
loggerinst.warning(str(exc))
loggerinst.warning("Another copy of convert2rhel may be running.")
loggerinst.warning("If you are certain that no other convert2rhel process")
loggerinst.warning("is running, remove the lock file and try again.\n")
loggerinst.warning("\nNo changes were made to the system.\n")
return ConversionExitCodes.FAILURE

Expand Down
13 changes: 6 additions & 7 deletions convert2rhel/unit_tests/applock_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,18 @@ def test_applock_basic(tmp_lock):

def test_applock_basic_islocked(tmp_lock):
with open(tmp_lock._pidfile, "w") as f:
pid = os.getpid()
f.write(str(pid) + "\n")
# Our parent process will be running and have a different pid
ppid = os.getppid()
f.write(str(ppid) + "\n")
with pytest.raises(applock.ApplicationLockedError):
tmp_lock.try_to_lock()
os.unlink(tmp_lock._pidfile)


def test_applock_basic_reap(tmp_lock):
"""Test the case where the lockfile was held by a process
that has exited."""
old_pid = subprocess.check_output("/bin/echo $$", shell=True, universal_newlines=True)
def test_applock_basic_lock_idempotent(tmp_lock):
with open(tmp_lock._pidfile, "w") as f:
f.write(old_pid)
pid = os.getpid()
f.write(str(pid) + "\n")
tmp_lock.try_to_lock()
os.unlink(tmp_lock._pidfile)

Expand Down
2 changes: 1 addition & 1 deletion convert2rhel/unit_tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,5 +707,5 @@ def test_main_already_running_conversion(monkeypatch, caplog, tmpdir):
monkeypatch.setattr(main, "main_locked", mock.Mock(side_effect=applock.ApplicationLockedError("failed")))

assert main.main() == 1
assert "Another copy of convert2rhel is running.\n" in caplog.records[-2].message
assert "Another copy of convert2rhel may be running.\n" in caplog.records[-4].message
assert "\nNo changes were made to the system.\n" in caplog.records[-1].message
Loading