From a5d9a09750c9b9246d8c8d2617280b010a087c92 Mon Sep 17 00:00:00 2001 From: Joris Gillis Date: Wed, 13 Oct 2021 14:03:01 +0200 Subject: [PATCH 1/3] Updating README Adding paragraph on schema's in PostgreSQL. --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index d6cfa80..8037971 100644 --- a/README.rst +++ b/README.rst @@ -11,6 +11,9 @@ Getting Started - Write access to an empty `PostgreSQL `_ database. - A Python installation with `Jupyter Notebook `_ >= 5.0. +PGContents will put its table in the `pgcontents` namespace. When you log onto the PostgreSQL database server, make +sure the `pgcontents` schema is in the search path (e.g., `set search_path to 'pgcontents'`; see `PostgreSQL documentation`_). + **Installation:** 0. Install ``pgcontents`` from PyPI via ``pip install pgcontents``. @@ -23,3 +26,4 @@ Demo Video You can see a demo of PGContents in action in `this presentation from JupyterCon 2017`_. .. _`this presentation from JupyterCon 2017` : https://youtu.be/TtsbspKHJGo?t=917 +.. _`PostgreSQL documentation` : https://www.postgresql.org/docs/14/ddl-schemas.html#DDL-SCHEMAS-PATH \ No newline at end of file From 58194b6c5ccb01793466a9dddabd1e5b15d9ad28 Mon Sep 17 00:00:00 2001 From: Joris Gillis Date: Wed, 13 Oct 2021 15:15:15 +0200 Subject: [PATCH 2/3] Fixing upsert of file First, a query is executed to check whether the file already exists. If not, it is inserted. If it does exist the file is updated. The current code was given issues because the transaction was closed after the failure. In the future it might be better to use a SQLAlchemy Session which handles upserts under the hood. --- pgcontents/query.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/pgcontents/query.py b/pgcontents/query.py index 2969f64..6ed3ada 100644 --- a/pgcontents/query.py +++ b/pgcontents/query.py @@ -510,7 +510,7 @@ def save_file(db, user_id, path, content, encrypt_func, max_size_bytes): ) directory, name = split_api_filepath(path) with db.begin_nested() as savepoint: - try: + if not file_exists(db, user_id, path): res = db.execute( files.insert().values( name=name, @@ -519,22 +519,16 @@ def save_file(db, user_id, path, content, encrypt_func, max_size_bytes): content=content, ) ) - except IntegrityError as error: - # The file already exists, so overwrite its content with the newer - # version. - if is_unique_violation(error): - savepoint.rollback() - res = db.execute( - files.update().where( - _file_where(user_id, path), - ).values( - content=content, - created_at=func.now(), - ) + else: + # The file already exists, so overwrite its content with the newer version. + res = db.execute( + files.update().where( + _file_where(user_id, path), + ).values( + content=content, + created_at=func.now(), ) - else: - # Unknown error. Reraise - raise + ) return res From f28f9098bee43b5626546e9b05bbbc6f8e1d8b4d Mon Sep 17 00:00:00 2001 From: Joris Gillis Date: Thu, 14 Oct 2021 11:22:59 +0200 Subject: [PATCH 3/3] Upserting a file Using the upsert feature (on_conflict_do_update) of SQLAlchemy and PostgreSQL, instead of relying on exception handling of an insert statement. Upsert statements are support from PostgreSQL 9.5 https://www.postgresql.org/about/press/presskit95/en/ https://www.postgresql.org/docs/current/sql-insert.html --- pgcontents/query.py | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/pgcontents/query.py b/pgcontents/query.py index 6ed3ada..749d0af 100644 --- a/pgcontents/query.py +++ b/pgcontents/query.py @@ -10,6 +10,7 @@ select, Unicode, ) +from sqlalchemy.dialects.postgresql import insert from sqlalchemy.exc import IntegrityError @@ -500,8 +501,6 @@ def rename_directory(db, user_id, old_api_path, new_api_path): def save_file(db, user_id, path, content, encrypt_func, max_size_bytes): """ Save a file. - - TODO: Update-then-insert is probably cheaper than insert-then-update. """ content = preprocess_incoming_content( content, @@ -509,26 +508,20 @@ def save_file(db, user_id, path, content, encrypt_func, max_size_bytes): max_size_bytes, ) directory, name = split_api_filepath(path) - with db.begin_nested() as savepoint: - if not file_exists(db, user_id, path): - res = db.execute( - files.insert().values( - name=name, - user_id=user_id, - parent_name=directory, - content=content, - ) - ) - else: - # The file already exists, so overwrite its content with the newer version. - res = db.execute( - files.update().where( - _file_where(user_id, path), - ).values( - content=content, - created_at=func.now(), - ) + with db.begin_nested(): + res = db.execute( + insert(files) + .values( + name=name, + user_id=user_id, + parent_name=directory, + content=content, ) + .on_conflict_do_update(constraint="uix_filepath_username", set_={ + "content": content, + "created_at": func.now() + }) + ) return res