rknn-inference-server/stack.yml

services:
  whisper-stt:
    image: ${OPENAI_WHISPER_IMAGE:-registry.lan/openai-whisper-stt:latest}
    ports:
      - target: 9000
        published: ${STT_PORT:-9000}
        protocol: tcp
        mode: host
    environment:
      MODEL_NAME: ${MODEL_NAME:-whisper-base-onnx}
      STT_API_KEY: ${STT_API_KEY:-}
      ENCODER_MODEL_PATH: /models/whisper_encoder_base_20s.onnx
      DECODER_MODEL_PATH: /models/whisper_decoder_base_20s.onnx
      MEL_FILTERS_PATH: /models/mel_80_filters.txt
      VOCAB_EN_PATH: /models/vocab_en.txt
      VOCAB_ZH_PATH: /models/vocab_zh.txt
      MAX_DECODE_TOKENS: ${MAX_DECODE_TOKENS:-128}
      VLM_ENABLED: ${VLM_ENABLED:-true}
      VLM_MODEL_NAME: ${VLM_MODEL_NAME:-qwen3-vl-2b-rkllm}
      VLM_CORE_NUM: ${VLM_CORE_NUM:-3}
      VLM_MAX_NEW_TOKENS: ${VLM_MAX_NEW_TOKENS:-256}
      VLM_MAX_CONTEXT_LEN: ${VLM_MAX_CONTEXT_LEN:-4096}
      VLM_TIMEOUT_SEC: ${VLM_TIMEOUT_SEC:-300}
    volumes:
      - whisper-models:/models:ro
      - rkllm-root:/opt/rkllm-root:ro
      - type: bind
        source: /dev/dri
        target: /dev/dri
    deploy:
      replicas: 1
      placement:
        constraints:
          - node.hostname == ${STT_NODE_HOSTNAME:-tpi-n1}
      restart_policy:
        condition: on-failure
    networks:
      - dokploy-network

volumes:
  whisper-models:
  rkllm-root:

networks:
  dokploy-network:
    external: true