-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
76 lines (63 loc) · 2.79 KB
/
Dockerfile
File metadata and controls
76 lines (63 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# ---------- builder ----------
FROM rust:1.90-bookworm AS builder
ENV CARGO_TERM_COLOR=always RUSTFLAGS="-C strip=symbols"
WORKDIR /src
# Prebuild deps
COPY Cargo.toml Cargo.lock ./
RUN mkdir -p src && echo "fn main(){}" > src/main.rs && \
cargo build --release && \
rm -rf target/release/deps/qwen_embed_8_rs* && rm -rf src
# Build real binary
COPY src ./src
RUN cargo build --release
# ---------- fetch onnxruntime ----------
FROM debian:bookworm-slim AS ort
ARG ORT_VERSION=1.23.2
WORKDIR /opt/onnxruntime
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates curl tar && \
curl -fsSL -o onnxruntime.tgz \
"https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
tar -xzf onnxruntime.tgz --strip-components=1 && rm -f onnxruntime.tgz
# ---------- runtime dep ----------
FROM debian:bookworm-slim AS syslibs
RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 && \
rm -rf /var/lib/apt/lists/*
# ---------- model (robust downloader) ----------
FROM debian:bookworm-slim AS model
WORKDIR /download
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates aria2 && \
rm -rf /var/lib/apt/lists/*
# Hugging Face 8B ONNX with external data + tokenizer
ARG HF_REPO="Maxi-Lein/Qwen3-Embedding-8B-onnx"
ENV HF_BASE_URL="https://huggingface.co/${HF_REPO}/resolve/main"
RUN mkdir -p models/Qwen3-Embedding-8B-onnx/onnx && \
aria2c -c -x16 -s16 --max-connection-per-server=16 \
--retry-wait=5 --max-tries=0 \
--dir=models/Qwen3-Embedding-8B-onnx/onnx \
--out=model.onnx \
"${HF_BASE_URL}/onnx/model.onnx?download=1" && \
aria2c -c -x16 -s16 --max-connection-per-server=16 \
--retry-wait=5 --max-tries=0 \
--dir=models/Qwen3-Embedding-8B-onnx/onnx \
--out=model.onnx_data \
"${HF_BASE_URL}/onnx/model.onnx_data?download=1" && \
aria2c -c -x16 -s16 --max-connection-per-server=16 \
--retry-wait=5 --max-tries=0 \
--dir=models/Qwen3-Embedding-8B-onnx \
--out=tokenizer.json \
"${HF_BASE_URL}/tokenizer.json?download=1"
# ---------- runtime ----------
FROM gcr.io/distroless/cc-debian12:nonroot
WORKDIR /app
# Rust binary (crate name qwen_embed_8_rs)
COPY --from=builder /src/target/release/qwen_embed_8_rs /app/qwen_embed_8_rs
# ORT shared libs + OpenMP
COPY --from=ort /opt/onnxruntime/lib /opt/onnxruntime/lib
COPY --from=syslibs /usr/lib/x86_64-linux-gnu/libgomp.so.1 /usr/lib/x86_64-linux-gnu/libgomp.so.1
# Model + tokenizer in layout expected by main.rs
COPY --from=model /download/models /app/models
ENV ORT_DYLIB_PATH=/opt/onnxruntime/lib/libonnxruntime.so \
LD_LIBRARY_PATH=/opt/onnxruntime/lib:/usr/lib/x86_64-linux-gnu
EXPOSE 8981
USER nonroot:nonroot
ENTRYPOINT ["/app/qwen_embed_8_rs"]