使用Lua脚本实现FreeSWITCH通话录音自动转写与存储

1. 业务场景与目标

客服中心通话录音后，需要自动转写为文本，存储到数据库供质检分析。目标：在FreeSWITCH通话结束时，自动触发录音转写，将录音文件路径、转写文本、时间戳等存入MySQL，实现端到端自动化。

2. 环境准备（uv + 依赖）

使用uv管理Python环境，确保FreeSWITCH已安装Lua模块。

# 安装uv（如未安装）
curl -LsSf https://astral.sh/uv/install.sh | sh
# 创建项目目录
mkdir freeswitch_transcription && cd freeswitch_transcription
# 初始化uv环境
uv venv
source .venv/bin/activate  # Linux/Mac
# 安装依赖
uv add fastapi uvicorn pydantic requests pymysql openai-whisper torch
# FreeSWITCH需确保Lua支持，检查/usr/local/freeswitch/scripts/目录

3. 数据说明（真实数据口径或模拟数据生成逻辑）

录音数据：FreeSWITCH生成的.wav文件，通常位于/var/lib/freeswitch/recordings/，文件名如${uuid}.wav。
转写文本：使用Whisper模型将.wav文件转写为中文或英文文本。
存储字段：录音文件路径、转写文本、通话唯一标识（UUID）、开始时间、结束时间、转写状态（成功/失败）。
模拟数据：可用ffmpeg生成测试.wav文件：ffmpeg -f lavfi -i sine=frequency=1000:duration=5 -ac 2 test.wav。

4. 训练/实现步骤（完整代码）

任务类型： 语音转写（序列到序列任务，但本文聚焦工程集成，不涉及模型训练）。

步骤1：创建Python转写服务（app.py）

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import subprocess
import os
import pymysql
from datetime import datetime
import logging

logging.basicConfig(level=logging.INFO)
app = FastAPI()

# 数据库配置（示例，需替换为实际值）
DB_CONFIG = {
    'host': 'localhost',
    'user': 'root',
    'password': 'password',
    'database': 'call_records',
    'charset': 'utf8mb4'
}

class TranscriptionRequest(BaseModel):
    file_path: str
    call_uuid: str
    start_time: str
    end_time: str

@app.post("/transcribe")
async def transcribe_audio(request: TranscriptionRequest):
    """接收录音文件路径，调用Whisper转写，结果存入数据库"""
    file_path = request.file_path
    if not os.path.exists(file_path):
        raise HTTPException(status_code=404, detail="File not found")

    # 使用Whisper命令行转写（确保whisper已安装：uv add openai-whisper）
    try:
        # 转写为中文，可调整模型（如base、small）
        result = subprocess.run(
            ["whisper", file_path, "--language", "Chinese", "--model", "base"],
            capture_output=True,
            text=True,
            timeout=30  # 超时设置，避免大文件卡死
        )
        if result.returncode != 0:
            raise Exception(f"Whisper failed: {result.stderr}")
        # 解析输出，获取转写文本（简化处理，实际需解析文件）
        transcription_text = result.stdout.split('\n')[0] if result.stdout else ""
    except subprocess.TimeoutExpired:
        transcription_text = "转写超时"
        logging.error(f"Transcription timeout for {file_path}")
    except Exception as e:
        transcription_text = f"转写失败: {str(e)}"
        logging.error(f"Transcription error: {e}")

    # 存储到MySQL
    conn = pymysql.connect(**DB_CONFIG)
    cursor = conn.cursor()
    try:
        sql = """
        INSERT INTO transcriptions (call_uuid, file_path, transcription, start_time, end_time, status)
        VALUES (%s, %s, %s, %s, %s, %s)
        """
        status = "success" if "转写" not in transcription_text else "failed"
        cursor.execute(sql, (request.call_uuid, file_path, transcription_text,
                             request.start_time, request.end_time, status))
        conn.commit()
    except Exception as e:
        logging.error(f"Database error: {e}")
        conn.rollback()
    finally:
        cursor.close()
        conn.close()

    return {"call_uuid": request.call_uuid, "transcription": transcription_text, "status": "processed"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

步骤2：创建Lua脚本（/usr/local/freeswitch/scripts/transcribe.lua）

-- transcribe.lua
-- FreeSWITCH Lua脚本，在通话结束时触发转写

local log = require "log"

-- 配置转写服务URL
local transcription_service_url = "http://localhost:8000/transcribe"

function transcribe(session)
    local uuid = session:getVariable("uuid")
    local start_time = session:getVariable("start_stamp")
    local end_time = os.date("%Y-%m-%d %H:%M:%S")
    local recording_path = session:getVariable("record_path")

    if not recording_path then
        log.err("No recording path found for UUID: " .. uuid)
        return
    end

    -- 构建HTTP请求
    local json_body = string.format(
        '{"file_path": "%s", "call_uuid": "%s", "start_time": "%s", "end_time": "%s"}',
        recording_path, uuid, start_time, end_time
    )

    local http = require "socket.http"
    local ltn12 = require "ltn12"
    local response_body = {}

    local res, code, headers, status = http.request{
        url = transcription_service_url,
        method = "POST",
        headers = {
            ["Content-Type"] = "application/json",
            ["Content-Length"] = tostring(#json_body)
        },
        source = ltn12.source.string(json_body),
        sink = ltn12.sink.table(response_body)
    }

    if code == 200 then
        log.info("Transcription triggered for UUID: " .. uuid)
    else
        log.err("Failed to trigger transcription for UUID: " .. uuid .. ", code: " .. tostring(code))
    end
end

-- 在FreeSWITCH拨号计划中调用：action(lua transcribe.lua transcribe)
return transcribe

步骤3：创建数据库表（MySQL）

CREATE DATABASE IF NOT EXISTS call_records;
USE call_records;

CREATE TABLE transcriptions (
    id INT AUTO_INCREMENT PRIMARY KEY,
    call_uuid VARCHAR(255) NOT NULL,
    file_path VARCHAR(500) NOT NULL,
    transcription TEXT,
    start_time DATETIME,
    end_time DATETIME,
    status VARCHAR(50) DEFAULT 'pending',
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_uuid (call_uuid),
    INDEX idx_status (status)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

5. 调用方式（离线批量 + 单条示例，至少一种）

单条示例： 在FreeSWITCH拨号计划中，通话结束后调用Lua脚本。

<!-- 在conf/dialplan/default.xml中添加 -->
<extension name="transcribe_after_call">
  <condition field="destination_number" expression="^.*$">
    <action application="lua" data="transcribe.lua transcribe"/>
  </condition>
</extension>

通话结束时，自动执行transcribe函数，发送录音信息到Python服务。

离线批量： 直接调用Python服务API处理历史录音文件。

curl -X POST http://localhost:8000/transcribe \
  -H "Content-Type: application/json" \
  -d '{"file_path": "/path/to/recording.wav", "call_uuid": "test123", "start_time": "2023-10-01 10:00:00", "end_time": "2023-10-01 10:05:00"}'

6. 指标说明

本文任务为语音转写，属序列生成，常用指标为词错误率（WER, Word Error Rate），但工程集成中更关注可用性指标：

转写成功率： 成功转写次数 / 总调用次数，直接反映服务稳定性，目标 >95%。
平均转写延迟： 从调用到完成的时间，影响实时性，客服场景建议 <30秒。
存储成功率： 数据成功存入数据库的比例，确保数据不丢失。

7. 上线后评估（离线监控、线上指标、重训触发条件）

离线监控： 日志分析转写失败原因（如文件缺失、Whisper超时），数据库检查数据完整性。
线上指标： 监控转写服务HTTP响应码（200比例）、平均响应时间、MySQL连接池状态。

8. 常见坑与排查

录音文件过大处理超时： Whisper转写大文件可能超时；在Python服务中设置subprocess超时（如代码中timeout=30），并监控文件大小，超过阈值（如100MB）先压缩或分片。
转写服务故障导致数据丢失： Lua脚本HTTP请求失败时，记录错误日志；可添加重试机制或消息队列（如Redis）缓冲请求。
Lua脚本性能瓶颈： 同步HTTP调用阻塞FreeSWITCH；改为异步（如通过event socket发送事件到外部服务），或限制并发调用数。
数据库连接泄露： 确保Python服务中每次数据库操作后关闭连接（使用try-finally）。
路径权限问题： 确保FreeSWITCH用户有权限读取录音文件，Python服务有权限写入数据库。