forked from dusty-nv/jetson-containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
39 lines (30 loc) · 1.38 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#---
# name: auto_gptq
# group: llm
# config: config.py
# depends: [transformers]
# requires: '>=34.1.0'
# test: test.py
# docs: docs.md
#---
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
ARG AUTOGPTQ_BRANCH
ARG TORCH_CUDA_ARCH_LIST
WORKDIR /opt
ADD https://api.github.com/repos/PanQiWei/AutoGPTQ/git/refs/heads/${AUTOGPTQ_BRANCH} /tmp/autogptq_version.json
RUN pip3 install --no-cache-dir gekko
RUN git clone --branch=${AUTOGPTQ_BRANCH} --depth=1 https://github.com/PanQiWei/AutoGPTQ.git && \
cd AutoGPTQ && \
sed 's|p = int(subprocess.*|p = os.cpu_count()|g' -i setup.py && \
sed "s|if platform.system() != 'Windows':|if platform.system() == 'disabled':|" -i setup.py && \
python3 setup.py --verbose bdist_wheel && \
cp dist/auto_gptq*.whl /opt
RUN pip3 install --no-cache-dir --verbose /opt/auto_gptq*.whl
# add memory stats to benchmark
RUN sed '/ logger.info(f"model device map.*/a\ \ \ \ logger.info(f"torch.cuda.max_memory_allocated: {torch.cuda.max_memory_allocated()}")' -i /opt/AutoGPTQ/examples/benchmark/generation_speed.py && \
sed '/ benchmark_generation_speed(model.*/a\ \ \ \ logger.info(f"torch.cuda.max_memory_allocated: {torch.cuda.max_memory_allocated()}")' -i /opt/AutoGPTQ/examples/benchmark/generation_speed.py && \
cat AutoGPTQ/examples/benchmark/generation_speed.py
WORKDIR /
# make sure it loads
RUN pip3 show auto-gptq && python3 -c 'import auto_gptq'