diff --git a/qtensor/compression/Compressor.py b/qtensor/compression/Compressor.py
index fecfc45d..520f7c5a 100644
--- a/qtensor/compression/Compressor.py
+++ b/qtensor/compression/Compressor.py
@@ -5,8 +5,8 @@
 print(Path(__file__).parent/'szx/src/')
 sys.path.append(str(Path(__file__).parent/'szx/src/'))
 sys.path.append('./szx/src')
-# sys.path.append(str(Path(__file__).parent/'szp/src/'))
-# sys.path.append('./szp/src')
+sys.path.append(str(Path(__file__).parent/'szp/src/'))
+sys.path.append('./szp/src')
 
 sys.path.append(str(Path(__file__).parent/'cusz/src'))
 sys.path.append('./cusz/src')
@@ -19,7 +19,7 @@
 import torch
 try:
     from cuszx_wrapper import cuszx_host_compress, cuszx_host_decompress, cuszx_device_compress, cuszx_device_decompress
-    # from cuSZp_wrapper import cuszp_device_compress, cuszp_device_decompress
+    from cuSZp_wrapper import cuszp_device_compress, cuszp_device_decompress
     from cusz_wrapper import cusz_device_compress, cusz_device_decompress
     from torch_quant_perchannel import quant_device_compress, quant_device_decompress
     from newsz_wrapper import newsz_device_compress, newsz_device_decompress
@@ -166,14 +166,24 @@ def free_decompressed(self):
         self.decompressed_own = []
 
     def free_compressed(self, ptr):
+        import ctypes, cupy
         cmp_bytes, num_elements_eff, shape, dtype, _ = ptr
+        p_decompressed_ptr = ctypes.addressof(cmp_bytes[0])
+        # cast to int64 pointer
+        # (effectively converting pointer to pointer to addr to pointer to int64)
+        p_decompressed_int= ctypes.cast(p_decompressed_ptr, ctypes.POINTER(ctypes.c_uint64))
+        decompressed_int = p_decompressed_int.contents
+        cupy.cuda.runtime.free(decompressed_int.value)
+        cupy.get_default_memory_pool().free_all_blocks()
         del cmp_bytes
 
     def compress(self, data):
         isCupy, num_elements_eff = _get_data_info(data)
         dtype = data.dtype
-        cmp_bytes, outSize_ptr = cuszp_device_compress(data, self.r2r_error,self.r2r_threshold)
-        return (cmp_bytes, num_elements_eff, data.shape, dtype, outSize_ptr)
+        print("Compressing")
+        print(type(data), type(num_elements_eff))
+        cmp_bytes, outSize_ptr = cuszp_device_compress(data, self.r2r_error,num_elements_eff, self.r2r_threshold)
+        return (cmp_bytes, num_elements_eff, data.shape, dtype, outSize_ptr.contents.value)
 
         # return (cmp_bytes, num_elements_eff, isCuPy, data.shape, dtype, outSize_ptr.contents.value)
     def compress_size(self, ptr):
@@ -182,7 +192,7 @@ def compress_size(self, ptr):
     def decompress(self, obj):
         import cupy
         cmp_bytes, num_elements_eff, shape, dtype, cmpsize = obj
-        decompressed_ptr = cuszp_device_decompress(num_elements_eff, cmp_bytes)
+        decompressed_ptr = cuszp_device_decompress(num_elements_eff, cmp_bytes, cmpsize, self, dtype)
         arr_cp = decompressed_ptr[0]
 
         arr = cupy.reshape(arr_cp, shape)
diff --git a/qtensor/compression/szp/src/cuSZp_wrapper.py b/qtensor/compression/szp/src/cuSZp_wrapper.py
index 9abe1fb1..4e887a3b 100644
--- a/qtensor/compression/szp/src/cuSZp_wrapper.py
+++ b/qtensor/compression/szp/src/cuSZp_wrapper.py
@@ -7,8 +7,8 @@
 import torch
 
 from pathlib import Path
-#LIB_PATH = str(Path(__file__).parent/'libcuszp_wrapper.so')
-LIB_PATH = '/home/mkshah5/QTensor/qtensor/compression/szp/src/libcuszp_wrapper.so'
+LIB_PATH = str(Path(__file__).parent/'libcuszp_wrapper.so')
+#LIB_PATH = '/home/mkshah5/QTensor/qtensor/compression/szp/src/libcuszp_wrapper.so'
 # unsigned char* cuSZp_device_compress(float *oriData, size_t *outSize, float absErrBound, size_t nbEle){
 
 def get_device_compress():