update logs

This commit is contained in:
2026-02-25 10:06:41 -05:00
parent c187c3398f
commit aa74ef0a36
3 changed files with 56 additions and 3 deletions

View File

@@ -1,4 +1,5 @@
import base64 import base64
import logging
import os import os
import re import re
import subprocess import subprocess
@@ -75,6 +76,7 @@ STATE: dict[str, Any] = {
} }
ANSI_RE = re.compile(r"\x1B\[[0-9;]*[A-Za-z]") ANSI_RE = re.compile(r"\x1B\[[0-9;]*[A-Za-z]")
LOGGER = logging.getLogger("uvicorn.error")
def read_vocab(path: str) -> dict[str, str]: def read_vocab(path: str) -> dict[str, str]:
@@ -154,16 +156,21 @@ def decode_tokens(vocab: dict[str, str], token_ids: list[int], language: str) ->
return text return text
def transcribe_file(path: str, language: str) -> str: def transcribe_file(path: str, language: str) -> tuple[str, dict[str, float | int]]:
t0 = time.perf_counter()
waveform, sr = sf.read(path) waveform, sr = sf.read(path)
waveform = to_mono(np.asarray(waveform, dtype=np.float32)) waveform = to_mono(np.asarray(waveform, dtype=np.float32))
waveform = ensure_sample_rate(waveform, sr) waveform = ensure_sample_rate(waveform, sr)
mel = log_mel_spectrogram(waveform, STATE["mel_filters"]) mel = log_mel_spectrogram(waveform, STATE["mel_filters"])
encoder_input = pad_or_trim(mel) encoder_input = pad_or_trim(mel)
t_pre = time.perf_counter()
encoded = STATE["encoder"].run(None, {"x": encoder_input})[0] encoded = STATE["encoder"].run(None, {"x": encoder_input})[0]
t_enc = time.perf_counter()
tokens = [50258, TASK_CODE[language], 50359, 50363] tokens = [50258, TASK_CODE[language], 50359, 50363]
tokens = tokens * 3
pop_index = 12
emitted: list[int] = [] emitted: list[int] = []
for _ in range(MAX_DECODE_TOKENS): for _ in range(MAX_DECODE_TOKENS):
@@ -178,11 +185,26 @@ def transcribe_file(path: str, language: str) -> str:
if next_token == END_TOKEN: if next_token == END_TOKEN:
break break
tokens.append(next_token) tokens.append(next_token)
if pop_index > 4:
pop_index -= 1
tokens.pop(pop_index)
if next_token <= TIMESTAMP_BEGIN: if next_token <= TIMESTAMP_BEGIN:
emitted.append(next_token) emitted.append(next_token)
vocab = STATE["vocab_en"] if language == "en" else STATE["vocab_zh"] vocab = STATE["vocab_en"] if language == "en" else STATE["vocab_zh"]
return decode_tokens(vocab, emitted, language) text = decode_tokens(vocab, emitted, language)
t_end = time.perf_counter()
stats: dict[str, float | int] = {
"audio_sec": float(len(waveform) / SAMPLE_RATE),
"pre_ms": float((t_pre - t0) * 1000.0),
"enc_ms": float((t_enc - t_pre) * 1000.0),
"dec_ms": float((t_end - t_enc) * 1000.0),
"total_ms": float((t_end - t0) * 1000.0),
"tokens": int(len(emitted)),
}
return text, stats
def convert_to_wav(src_path: str) -> str: def convert_to_wav(src_path: str) -> str:
@@ -413,13 +435,28 @@ async def transcriptions(
with open(input_path, "wb") as f: with open(input_path, "wb") as f:
f.write(payload) f.write(payload)
wav_path = convert_to_wav(input_path) wav_path = convert_to_wav(input_path)
text = transcribe_file(wav_path, language) text, perf = transcribe_file(wav_path, language)
finally: finally:
if os.path.exists(input_path): if os.path.exists(input_path):
os.unlink(input_path) os.unlink(input_path)
if wav_path and os.path.exists(wav_path): if wav_path and os.path.exists(wav_path):
os.unlink(wav_path) os.unlink(wav_path)
total_s = perf["total_ms"] / 1000.0 if perf["total_ms"] else 0.0
rtf = (total_s / perf["audio_sec"]) if perf["audio_sec"] else 0.0
LOGGER.info(
"stt_perf model=%s lang=%s audio_sec=%.3f pre_ms=%.1f enc_ms=%.1f dec_ms=%.1f total_ms=%.1f rtf=%.3f tokens=%d",
model,
language,
perf["audio_sec"],
perf["pre_ms"],
perf["enc_ms"],
perf["dec_ms"],
perf["total_ms"],
rtf,
perf["tokens"],
)
if response_format == "text": if response_format == "text":
return PlainTextResponse(text) return PlainTextResponse(text)
if response_format == "verbose_json": if response_format == "verbose_json":

View File

@@ -35,6 +35,12 @@ services:
- rkllm-root:/opt/rkllm-root:ro - rkllm-root:/opt/rkllm-root:ro
devices: devices:
- /dev/dri:/dev/dri - /dev/dri:/dev/dri
- /dev/mali0:/dev/mali0
- /dev/dma_heap:/dev/dma_heap
cap_add:
- SYS_ADMIN
security_opt:
- seccomp=unconfined
healthcheck: healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:9000/health')"] test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:9000/health')"]
interval: 20s interval: 20s

View File

@@ -27,6 +27,16 @@ services:
- type: bind - type: bind
source: /dev/dri source: /dev/dri
target: /dev/dri target: /dev/dri
- type: bind
source: /dev/mali0
target: /dev/mali0
- type: bind
source: /dev/dma_heap
target: /dev/dma_heap
cap_add:
- SYS_ADMIN
security_opt:
- seccomp=unconfined
deploy: deploy:
replicas: 1 replicas: 1
placement: placement: