diff --git a/README.rst b/README.rst
index 7bb76660..e6aebc4d 100644
--- a/README.rst
+++ b/README.rst
@@ -1,4 +1,3 @@
-
 .. image:: https://github.com/linusjoonho/ipie/blob/main/logo.png
     :width: 200
 
@@ -6,8 +5,8 @@ ipie stands for **I**\ntelligent **P**\ython-based **I**\maginary-time **E**\vol
 
 ipie inherits a lot of QMC features from pauxy.
 
-.. image:: https://github.com/linusjoonho/ipie/workflows/CI/badge.svg
-    :target: https://github.com/linusjoonho/ipie/workflows/CI/badge.svg
+.. image:: https://github.com/JoonhoLee-Group/ipie/actions/workflows/ci.yml/badge.svg
+    :target: https://github.com/JoonhoLee-Group/ipie/actions/workflows/ci.yml
 
 .. image:: http://readthedocs.org/projects/ipie/badge/?version=latest
     :target: http://ipie.readthedocs.io/en/latest/?badge=latest
@@ -21,33 +20,44 @@ ipie inherits a lot of QMC features from pauxy.
 .. image:: https://img.shields.io/badge/paper%20%28v0%29-arXiv%3A2209.04015-B31B1B
     :target: https://arxiv.org/abs/2209.04015
 
+.. image:: https://img.shields.io/badge/paper%20%28v1%29-arXiv%3A2406.16238-B31B1B
+    :target: https://arxiv.org/abs/2406.16238
+
 Copyright by Joonho Lee (joonholee@g.harvard.edu)
-Our first release paper can be found at https://pubs.acs.org/doi/10.1021/acs.jctc.2c00934
 
-Features
+ipie is a Python-based auxiliary-field quantum Monte Carlo (AFQMC) package, designed for simplicity and computational efficiency. The package has seen substantial improvements in modularity, functionality, and compatibility since its first release.
+
+Key features include:
 --------
-ipie currently supports:
 
-- estimation of the ground state energy of ab-initio systems using phaseless AFQMC with support for CPUs and GPUs.
-- simple data analysis.
-- other legacy functionalities available in pauxy such as the ground state and finite-temperature energies and properties (via backpropagation) of the ab initio, UEG, Hubbard, and Hubbard-Holstein models.
+- **Ground State Energy Estimation**: Calculate ground state energies of ab-initio systems with phaseless AFQMC.
+- **Distributed Hamiltonian Simulations**: Run large-scale simulations distributed across multiple CPUs or GPUs, enabling calculations on systems too large for a single node or GPU card.
+- **GPU Acceleration**: Support both CPU and GPU calculations, with GPU acceleration provided by CuPy/CUDA and CUDA-aware MPI.
+- **Extended AFQMC Algorithms**: Includes free projection AFQMC, finite temperature AFQMC, AFQMC for electron-phonon systems, and automatic differentiation for property calculation.
+- **Simple Data Analysis**
+- **Other legacy features from pauxy**
+
+For technical details, see our latest release papers:
+
+- [J. Chem. Theory Comput., 2023, 19(1): 109-121](https://pubs.acs.org/doi/10.1021/acs.jctc.2c00934)
+- [J. Chem. Phys. 161, 162502 (2024)](https://doi.org/10.1063/5.0225596)
 
 Installation
 ------------
 
-Linux and Mac OS wheels are available for installation via pip
+Linux and Mac OS wheels are available for installation via pip:
 
 ::
 
     $ pip install ipie
 
-For develpment you can instead clone the repository
+For development, clone the repository:
 
 ::
 
     $ git clone https://github.com/linusjoonho/ipie.git
 
-and run the following in the top-level ipie directory
+Navigate to the top-level `ipie` directory and run:
 
 ::
 
@@ -57,57 +67,47 @@ and run the following in the top-level ipie directory
 Requirements
 ------------
 
-To build ipie with MPI support (via mpi4py) do:
+To build ipie with MPI support (via `mpi4py`), install with:
 
 ::
 
     $ pip install -e .[mpi]
 
-Note that mpi4py requires a working MPI installation to be built on your
-machine. This  it is often the trickiest dependency to setup correctly.
-
-One of the easiest ways (if you are using pip to install ipie wheels) is via
-conda:
+This requires a working MPI installation on your machine, which can be installed via `conda`:
 
 ::
 
     conda install openmpi
 
-which will just install the OpenMPI library. 
-We refer users to the mpi4py
-`documentation <https://mpi4py.readthedocs.io/en/stable/install.html>`_ for
-alternative ways of building mpi4py and the required MPI library.
-
-Further requirements are listed in requirements.txt.
-
-GPU Support
------------
-Cupy is is required when running calculations on GPUs which
-can be install following the instructions `here <https://cupy.dev/>`_ .
+Refer to the `mpi4py` `documentation <https://mpi4py.readthedocs.io/en/stable/install.html>`_ for alternative installation options.
 
-Cuda aware MPI may be installed via conda-forge.
+For GPU support, `cupy` is required and can be installed as outlined on the `Cupy website <https://cupy.dev/>`_. For CUDA-aware MPI, consider `conda-forge`.
 
 Running the Test Suite
 ----------------------
 
-ipie contains unit tests and some longer driver tests that can be run using pytest by
-running:
+Unit tests and driver tests are included. To run all tests:
 
 ::
 
     $ pytest -v
 
-in the base of the repo. Some longer parallel tests are also run through the CI. See
-`.github/workflows/ci.yml` for more details.
+More extensive parallel tests are executed in the CI; see `.github/workflows/ci.yml` for details.
 
-.. image:: https://github.com/linusjoonho/ipie/workflows/CI/badge.svg
-    :target: https://github.com/linusjoonho/ipie/workflows/CI/badge.svg
+.. image:: https://github.com/JoonhoLee-Group/ipie/actions/workflows/ci.yml/badge.svg
+    :target: https://github.com/JoonhoLee-Group/ipie/actions/workflows/ci.yml
+
+Building Optimized C/C++ Code
+-----------------------------
+
+ipie also provides optimized implementations for certain functions involving multiple Slater determinant trials using C/C++ code, which can be built for enhanced performance. To compile these functions into a shared library, navigate to the `ipie/lib/wicks` folder and use `CMake` and `Make`:
+
+::
+
+    $ cmake .
+    $ make
 
 Documentation
 -------------
 
-Documentation and tutorials are available at
-`readthedocs <https://ipie.readthedocs.org>`_.
-
-.. image:: http://readthedocs.org/projects/ipie/badge/?version=latest
-    :target: http://ipie.readthedocs.io/en/latest/?badge=latest
+Documentation and tutorials are available at `ReadTheDocs <https://ipie.readthedocs.org>`_.
diff --git a/ipie/trial_wavefunction/single_det.py b/ipie/trial_wavefunction/single_det.py
index f2fb7b58..648de937 100644
--- a/ipie/trial_wavefunction/single_det.py
+++ b/ipie/trial_wavefunction/single_det.py
@@ -1,10 +1,11 @@
 import time
 from typing import Optional
 
+from typing import Union
 import numpy
 import plum
 
-from ipie.config import CommType, config, MPI
+from ipie.config import CommType, config
 from ipie.estimators.generic import half_rotated_cholesky_jk_uhf
 from ipie.estimators.greens_function_single_det import (
     greens_function_single_det,
@@ -23,7 +24,6 @@
 from ipie.utils.backend import arraylib as xp
 from ipie.utils.mpi import MPIHandler
 from ipie.walkers.uhf_walkers import UHFWalkers
-from typing import Union
 
 
 # class for UHF trial
@@ -89,7 +89,7 @@ def calculate_energy(self, system, hamiltonian) -> numpy.ndarray:
     def half_rotate(
         self: "SingleDet",
         hamiltonian: GenericRealChol,
-        comm: Optional[CommType] = MPI.COMM_WORLD,
+        comm: Optional[CommType] = MPIHandler().scomm,
     ):
         num_dets = 1
         orbsa = self.psi0a.reshape((num_dets, self.nbasis, self.nalpha))
@@ -115,7 +115,7 @@ def half_rotate(
     def half_rotate(
         self: "SingleDet",
         hamiltonian: GenericRealCholChunked,
-        comm: Optional[CommType] = MPI.COMM_WORLD,
+        comm: Optional[CommType] = MPIHandler().scomm,
     ):
         num_dets = 1
         orbsa = self.psi0a.reshape((num_dets, self.nbasis, self.nalpha))
@@ -137,16 +137,11 @@ def half_rotate(
         self._rcholb_chunk = rot_chol[1][0]
         self.half_rotated = True
 
-        # rot_1body_1 = numpy.load('../Test_Disk_nochunk/rot_1body.npy')
-        # rot_chol_1 = numpy.load('../Test_Disk_nochunk/rot_chol.npy')
-
-        # print('compare', [numpy.allclose(rot_1body, rot_1body_1), numpy.allclose(rot_chol, rot_chol_1)])
-
     @plum.dispatch
     def half_rotate(
         self: "SingleDet",
         hamiltonian: GenericComplexChol,
-        comm: Optional[CommType] = MPI.COMM_WORLD,
+        comm: Optional[CommType] = MPIHandler().scomm,
     ):
         num_dets = 1
         orbsa = self.psi0a.reshape((num_dets, self.nbasis, self.nalpha))
@@ -180,8 +175,7 @@ def calc_overlap(self, walkers) -> numpy.ndarray:
     def calc_greens_function(self, walkers, build_full: bool = False) -> numpy.ndarray:
         if config.get_option("use_gpu"):
             return greens_function_single_det_batch(walkers, self, build_full=build_full)
-        else:
-            return greens_function_single_det(walkers, self, build_full=build_full)
+        return greens_function_single_det(walkers, self, build_full=build_full)
 
     @plum.dispatch
     def calc_force_bias(
@@ -194,10 +188,9 @@ def calc_force_bias(
             return construct_force_bias_batch_single_det_chunked(
                 hamiltonian, walkers, self, mpi_handler
             )
-        else:
-            return construct_force_bias_batch_single_det(
-                hamiltonian, walkers, self._rchola, self._rcholb
-            )
+        return construct_force_bias_batch_single_det(
+            hamiltonian, walkers, self._rchola, self._rcholb
+        )
 
     @plum.dispatch
     def calc_force_bias(
diff --git a/ipie/utils/hamiltonian_converter.py b/ipie/utils/hamiltonian_converter.py
index 3f7e3c66..b0adaa66 100644
--- a/ipie/utils/hamiltonian_converter.py
+++ b/ipie/utils/hamiltonian_converter.py
@@ -259,8 +259,7 @@ def read_cholesky(filename, full=True, ichunk=None, real_ints=False):
                 s += bs
             chol_vecs = scipy.sparse.csr_matrix((vals, (row_ix, col_ix)), shape=(nmo * nmo, nchol))
             return chol_vecs
-        else:
-            return get_chunk(fh5, ichunk, real_ints)
+        return get_chunk(fh5, ichunk, real_ints)
 
 
 def get_chunk(fh5, ichunk, real_ints):
@@ -291,20 +290,17 @@ def check_sym(ikjl, nmo, sym):
     """
     if sym == 1:
         return True
-    else:
-        i, k, j, l = ikjl
-        if sym == 4:
-            kilj = (k, i, l, j)
-            jlik = (j, l, i, k)
-            ljki = (l, j, k, i)
-            if (ikjl > jlik) or (ikjl > kilj) or (ikjl > ljki):
-                return False
-            else:
-                return True
-        else:
-            ik = i + k * nmo
-            jl = j + l * nmo
-            return (i >= k and j >= l) and ik >= jl
+    i, k, j, l = ikjl
+    if sym == 4:
+        kilj = (k, i, l, j)
+        jlik = (j, l, i, k)
+        ljki = (l, j, k, i)
+        if (ikjl > jlik) or (ikjl > kilj) or (ikjl > ljki):
+            return False
+        return True
+    ik = i + k * nmo
+    jl = j + l * nmo
+    return (i >= k and j >= l) and ik >= jl
 
 
 def fmt_integral(intg, i, k, j, l, cplx, paren=False):
@@ -417,8 +413,9 @@ def read_qmcpack_cholesky_kpoint(filename, get_chol=True):
         nbeta = dims[5]
         for i in range(0, nkp):
             hk = fh5[f"Hamiltonian/H1_kp{i}"][:]
+            hk = numpy.array(hk, dtype=numpy.complex128)
             nmo = nmo_pk[i]
-            hcore.append(hk.view(numpy.complex128).reshape(nmo, nmo))
+            hcore.append(hk.reshape(nmo, nmo))
         chol_vecs = []
     if get_chol:
         for i in range(0, nkp):
@@ -443,10 +440,12 @@ def get_kpoint_chol(filename, nchol_pk, minus_k, i):
     with h5py.File(filename, "r") as fh5:
         try:
             Lk = fh5[f"Hamiltonian/KPFactorized/L{i}"][:]
-            Lk = Lk.view(numpy.complex128)[:, :, 0]
+            Lk = numpy.array(Lk, dtype=numpy.complex128)
+            Lk = Lk[:, :, 0]
         except KeyError:
             Lk = fh5[f"Hamiltonian/KPFactorized/L{minus_k[i]}"][:]
-            Lk = Lk.view(numpy.complex128).conj()[:, :, 0]
+            Lk = numpy.array(Lk, dtype=numpy.complex128)
+            Lk = Lk[:, :, 0].conj()[:, :, 0]
     return Lk