Files
rknn-inference-server/stack.yml
2026-02-24 21:22:12 -05:00

49 lines
1.4 KiB
YAML

services:
whisper-stt:
image: ${OPENAI_WHISPER_IMAGE:-registry.lan/openai-whisper-stt:latest}
ports:
- target: 9000
published: ${STT_PORT:-9000}
protocol: tcp
mode: host
environment:
MODEL_NAME: ${MODEL_NAME:-whisper-base-onnx}
STT_API_KEY: ${STT_API_KEY:-}
ENCODER_MODEL_PATH: /models/whisper_encoder_base_20s.onnx
DECODER_MODEL_PATH: /models/whisper_decoder_base_20s.onnx
MEL_FILTERS_PATH: /models/mel_80_filters.txt
VOCAB_EN_PATH: /models/vocab_en.txt
VOCAB_ZH_PATH: /models/vocab_zh.txt
MAX_DECODE_TOKENS: ${MAX_DECODE_TOKENS:-128}
VLM_ENABLED: ${VLM_ENABLED:-true}
VLM_MODEL_NAME: ${VLM_MODEL_NAME:-qwen3-vl-2b-rkllm}
VLM_CORE_NUM: ${VLM_CORE_NUM:-3}
VLM_MAX_NEW_TOKENS: ${VLM_MAX_NEW_TOKENS:-256}
VLM_MAX_CONTEXT_LEN: ${VLM_MAX_CONTEXT_LEN:-4096}
VLM_TIMEOUT_SEC: ${VLM_TIMEOUT_SEC:-300}
volumes:
- whisper-models:/models:ro
- rkllm-root:/opt/rkllm-root:ro
- type: bind
source: /dev/dri
target: /dev/dri
deploy:
replicas: 1
placement:
constraints:
- node.hostname == ${STT_NODE_HOSTNAME:-tpi-n2}
restart_policy:
condition: on-failure
networks:
- dokploy-network
volumes:
whisper-models:
name: whisper-models
rkllm-root:
name: rkllm-root
networks:
dokploy-network:
external: true