services: whisper-models-init: build: context: . image: ${OPENAI_WHISPER_IMAGE:-rk-whisper-stt-api:latest} command: ["python", "/app/app/download_models.py", "--target", "/models"] volumes: - whisper-models:/models profiles: ["init"] whisper-stt: build: context: . image: ${OPENAI_WHISPER_IMAGE:-rk-whisper-stt-api:latest} restart: unless-stopped ports: - "${STT_PORT:-9000}:9000" environment: MODEL_NAME: ${MODEL_NAME:-whisper-base-onnx} STT_API_KEY: ${STT_API_KEY:-} ENCODER_MODEL_PATH: /models/whisper_encoder_base_20s.onnx DECODER_MODEL_PATH: /models/whisper_decoder_base_20s.onnx MEL_FILTERS_PATH: /models/mel_80_filters.txt VOCAB_EN_PATH: /models/vocab_en.txt VOCAB_ZH_PATH: /models/vocab_zh.txt MAX_DECODE_TOKENS: ${MAX_DECODE_TOKENS:-128} VLM_ENABLED: ${VLM_ENABLED:-false} VLM_MODEL_NAME: ${VLM_MODEL_NAME:-qwen3-vl-2b-rkllm} VLM_CORE_NUM: ${VLM_CORE_NUM:-3} VLM_MAX_NEW_TOKENS: ${VLM_MAX_NEW_TOKENS:-256} VLM_MAX_CONTEXT_LEN: ${VLM_MAX_CONTEXT_LEN:-4096} VLM_TIMEOUT_SEC: ${VLM_TIMEOUT_SEC:-300} volumes: - whisper-models:/models:ro - rkllm-root:/opt/rkllm-root:ro devices: - /dev/dri:/dev/dri healthcheck: test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:9000/health')"] interval: 20s timeout: 5s retries: 3 volumes: whisper-models: rkllm-root: