Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

build: compile-pyc #1891

Merged
merged 3 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion installer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ RUN chmod 755 /opt/maxkb/app/installer/run-maxkb.sh && \
useradd --no-create-home --home /opt/maxkb/app/sandbox sandbox -g root && \
chown -R sandbox:root /opt/maxkb/app/sandbox && \
chmod g-x /usr/local/bin/* /usr/bin/* /bin/* /usr/sbin/* /sbin/* /usr/lib/postgresql/15/bin/* && \
chmod g+x /usr/local/bin/python* /bin/sh
chmod g+x /usr/local/bin/python* /bin/sh && \
python3 /opt/maxkb/app/installer/compile.py

EXPOSE 8080

Expand Down
86 changes: 86 additions & 0 deletions installer/compile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# coding=utf-8
"""
@project: MaxKB
@Author:虎
@file: compile.py
@date:2024/12/23 14:11
@desc:
"""
import os
import sys
import shutil
from py_compile import compile


def clean(path_str: str):
for parent, dir_name, filename in os.walk(path_str):
for dir_str in dir_name:
if dir == '__pycache__':
fullname = os.path.join(parent, dir_str)
try:
shutil.rmtree(fullname)
print("Success clean Folder:%s" % fullname)
except Exception as e:
print("Can't clean Folder:%s, reason:%s" % (fullname, e))


def compile_pyc(path_str: str):
for parent, dir_name, filename in os.walk(path_str):
for cfile in filename:
fullname = os.path.join(parent, cfile)
if cfile[-3:] == '.py':
try:
if compile(fullname):
if cfile != 'settings.py' and cfile != 'wsgi.py':
os.remove(fullname) # 删除原文件,保留settings.py和wsgi.py
print("Success compile and remove file:%s" % fullname)
else:
print("Can't compile file:%s,The original file has been retained" % fullname)
except Exception as e:
print("Can't compile file:%s, reason:%s" % (fullname, e))


def move(path_str: str):
for parent, dir_name, filename in os.walk(path_str):
for c_file in filename:
fullname = os.path.join(parent, c_file)
if c_file[-4:] == '.pyc':
try:
if parent.endswith('__pycache__'):
parent_path = os.path.dirname(parent)
shutil.move(fullname, parent_path)
print('update the dir of file successfully')
except Exception as e:
print("Can't move file:%s, reason:%s" % (fullname, e))


def replace_name(path_str: str):
for parent, dir_name, filename in os.walk(path_str):
for c_file in filename:
fullname = os.path.join(parent, c_file)
if c_file[-4:] == '.pyc':
try:
cfile_name = ''
cfile_list = c_file.split('.')
version = sys.version_info
replace_name_str = 'cpython-' + str(version[0]) + str(version[1])
for i in range(len(cfile_list)):
if cfile_list[i] == replace_name_str:
continue
cfile_name += cfile_list[i]
if i == len(cfile_list) - 1:
continue
cfile_name += '.'
shutil.move(fullname, os.path.join(parent, cfile_name))
print('update the name of the file successfully')
except Exception as e:
print("Can't remove file:%s, reason:%s" % (fullname, e))


if __name__ == '__main__':
path = "/opt/maxkb/app/apps"
clean(path)
compile_pyc(path)
move(path)
replace_name(path)
clean(path)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are several issues with this script:

  1. Directory Path Calculation: The os.path.join function is used incorrectly. It expects strings, so you should pass them separately.

  2. File Extension Checking: The file extension checking logic needs to be updated to correctly identify Python files ('*.py') instead of just '.py'.

  3. File Removal Logic: Removing .pcy files when they are inside __pycache__ directories can lead to errors because the directory itself remains and doesn't affect runtime imports.

  4. Name Replacement Logic: The replacement of names based on Python version information might not work correctly due to missing parts of sys.version_info.

  5. Main Entry Point: The main entry point checks for path at the top level, which causes an error unless it's defined beforehand.

Here’s a revised version of the script addressing these issues:

# coding=utf-8
"""
    @project: MaxKB
    @Author:虎
    @file: compile.py
    @date:2024/12/23 14:11
    @desc:
"""
import os
import shutil

def clean(src_dir):
    """
    Remove all cache folders from src_dir.
    """
    for root, dirs, _ in os.walk(src_dir):
        for dir_name in dirs:
            if dir_name == '__pycache__':
                pth = os.path.join(root, dir_name)
                try:
                    shutil.rmtree(pth)
                    print(f'Successful cleanup folder: {pth}')
                except Exception as e:
                    print(f'Unable to delete folder {pth}: {e}')

def compile_pyc(source_dir):
    """
    Compile .py files into .pyc files within source_dir.
    Keep settings.py and wsgi.py intact.
    """
    for root, _, filenames in os.walk(source_dir):
        for filename in filenames:
            if filename.endswith('.py'):
                full_path = os.path.join(root, filename)
                try:
                    compiled = compile(full_path)[0]
                    if compiled and filename not in ['settings.py', 'wsgi.py']:
                        os.remove(full_path)
                    print(f'Succesfully compiled and removed {full_path}')
                except Exception as e:
                    print(f'Failed to compile {full_path}: {e}')

def update_directories(src_dir):
    """
    Move .pyc files from '__pycache__' directories back to their original locations.
    Update their names based on their respective versions.
    """
    from distutils.sysconfig import get_python_lib
    python_exe = get_python_lib()[:-1] + '/site-packages'

    for root, dirs, _ in os.walk(dst_dir):
        for dir_name in dirs:
            if dir_name == '__pycache__':
                pth = os.path.join(root, dir_name)
                for filename in os.listdir(pth):
                    if filename.endswith('.pyc'):
                        full_path = os.path.join(pth, filename)
                        new_filename = f"{filename}.cpython-{sys.version[:3]}"
                        dst_path = os.path.join(python_exe, *root.replace(src_dir, '').split(os.sep), new_filename)
                        
                        try:
                            shutil.move(full_path, dst_path)
                            print(f'Successfully moved and renamed {full_path} to {dst_path}')
                        except Exception as e:
                            print(f'Failed to move or rename {full_path}: {e}')


if __name__ == "__main__":
    src_dir = "/opt/maxkb/app/apps"
    
    # Clean up old caches
    print("\nCleaning up ...", end="")
    clean(src_dir)

    # Compile .py -> .pyc
    if input("Compile pyc? [Y/n]: ").lower() != "n":
        compile_pyc(src_dir)
    else:
        print("\nSkipping compilation...")

    # Update directories structure
    print("\nUpdating directories ...")
    update_directories(src_dir)

Key Changes made:

  • Corrected directory and file name joining using os.path.join.
  • Fixed file type detection ensuring only .py files are processed.
  • Removed unnecessary file removal operations related to __pycache__ directories since Python handles caching internally.
  • Provided guidance for user interaction through a simple prompt in console.
  • Updated the update_directories function to correctly handle path manipulations across systems (Python 2 vs 3 compatibility).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some potential optimizations:

  1. Handle exceptions more gracefully and include meaningful log information.
  2. Consider using multiprocessing to parallelize some of the operations for better performance, especially on larger directories.
  3. Check if a file is already compiled before attempting to recompile it, to avoid unnecessary work.
  4. Ensure that all files are moved to their correct location within the __pycache__ directory to maintain consistency.

Additionally, note that os.walk() can be inefficient with very large directories due to its memory usage pattern (it loads entire lists of subdirectories into memory). You might consider refining how you handle recursion and processing in such cases.

Loading