Files
rknn-inference-server/docker-compose.yml
2026-02-24 21:22:12 -05:00

49 lines
1.5 KiB
YAML

services:
whisper-models-init:
build:
context: .
image: ${OPENAI_WHISPER_IMAGE:-rk-whisper-stt-api:latest}
command: ["python", "/app/app/download_models.py", "--target", "/models"]
volumes:
- whisper-models:/models
profiles: ["init"]
whisper-stt:
build:
context: .
image: ${OPENAI_WHISPER_IMAGE:-rk-whisper-stt-api:latest}
restart: unless-stopped
ports:
- "${STT_PORT:-9000}:9000"
environment:
MODEL_NAME: ${MODEL_NAME:-whisper-base-onnx}
STT_API_KEY: ${STT_API_KEY:-}
ENCODER_MODEL_PATH: /models/whisper_encoder_base_20s.onnx
DECODER_MODEL_PATH: /models/whisper_decoder_base_20s.onnx
MEL_FILTERS_PATH: /models/mel_80_filters.txt
VOCAB_EN_PATH: /models/vocab_en.txt
VOCAB_ZH_PATH: /models/vocab_zh.txt
MAX_DECODE_TOKENS: ${MAX_DECODE_TOKENS:-128}
VLM_ENABLED: ${VLM_ENABLED:-false}
VLM_MODEL_NAME: ${VLM_MODEL_NAME:-qwen3-vl-2b-rkllm}
VLM_CORE_NUM: ${VLM_CORE_NUM:-3}
VLM_MAX_NEW_TOKENS: ${VLM_MAX_NEW_TOKENS:-256}
VLM_MAX_CONTEXT_LEN: ${VLM_MAX_CONTEXT_LEN:-4096}
VLM_TIMEOUT_SEC: ${VLM_TIMEOUT_SEC:-300}
volumes:
- whisper-models:/models:ro
- rkllm-root:/opt/rkllm-root:ro
devices:
- /dev/dri:/dev/dri
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:9000/health')"]
interval: 20s
timeout: 5s
retries: 3
volumes:
whisper-models:
name: whisper-models
rkllm-root:
name: rkllm-root