F5-TTS-TRT

克隆项目

1
git clone https://github.com/SWivid/F5-TTS.git

切换工作目录

1
cd F5-TTS/src/f5_tts/runtime/triton_trtllm

新建脚本:vi build_F5_tts_assets.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/bash
set -e

MODEL_ID=${1:-F5TTS_Base}

echo "Starting F5-TTS asset build for model: $MODEL_ID"
echo "Current directory: $(pwd)"

F5_TTS_HF_DOWNLOAD_PATH="./F5-TTS_HF_MODELS"
F5_TTS_TRT_LLM_CHECKPOINT_PATH="./trtllm_ckpt"
F5_TTS_TRT_LLM_ENGINE_PATH="./f5_trt_llm_engine"
VOCODER_TRT_ENGINE_PATH="./vocos_vocoder.plan"
MODEL_REPO_PATH="./model_repo"

# 国内使用镜像站
export HF_MIRROR=https://hf-mirror.com
export HF_ENDPOINT=https://hf-mirror.com

echo "Stage 0: Downloading F5 TTS model ($MODEL_ID) from Hugging Face..."
mkdir -p "$F5_TTS_HF_DOWNLOAD_PATH"
huggingface-cli download SWivid/F5-TTS --repo-type model --include "$MODEL_ID/*" --local-dir "$F5_TTS_HF_DOWNLOAD_PATH" --local-dir-use-symlinks False

echo "Stage 1: Converting checkpoint and building TRT-LLM engine..."
python3 ./scripts/convert_checkpoint.py \
--timm_ckpt "$F5_TTS_HF_DOWNLOAD_PATH/$MODEL_ID/model_1200000.pt" \
--output_dir "$F5_TTS_TRT_LLM_CHECKPOINT_PATH" --model_name "$MODEL_ID"

TRTLLM_MODELS_PATH=$(python3 -c "import tensorrt_llm, os; print(os.path.join(os.path.dirname(tensorrt_llm.__file__), 'models'))")
echo "Patching TensorRT-LLM models at: $TRTLLM_MODELS_PATH"
cp -r ./patch/* "$TRTLLM_MODELS_PATH/"

trtllm-build --checkpoint_dir "$F5_TTS_TRT_LLM_CHECKPOINT_PATH" \
--max_batch_size 8 \
--output_dir "$F5_TTS_TRT_LLM_ENGINE_PATH" --remove_input_padding disable

echo "Stage 2: Exporting vocos vocoder to ONNX and TensorRT engine..."
ONNX_VOCODER_PATH="./vocos_vocoder.onnx"
python3 scripts/export_vocoder_to_onnx.py --vocoder vocos --output-path "$ONNX_VOCODER_PATH"
bash scripts/export_vocos_trt.sh "$ONNX_VOCODER_PATH" "$VOCODER_TRT_ENGINE_PATH"

echo "Stage 3: Building Triton server model repository..."
rm -rf "$MODEL_REPO_PATH"
cp -r ./model_repo_f5_tts "$MODEL_REPO_PATH"


python3 scripts/fill_template.py -i "$MODEL_REPO_PATH/f5_tts/config.pbtxt" \
vocab:"$F5_TTS_HF_DOWNLOAD_PATH/$MODEL_ID/vocab.txt",model:"$F5_TTS_HF_DOWNLOAD_PATH/$MODEL_ID/model_1200000.pt",trtllm:"$F5_TTS_TRT_LLM_ENGINE_PATH",vocoder:vocos

mkdir -p "$MODEL_REPO_PATH/vocoder/1/"
cp "$VOCODER_TRT_ENGINE_PATH" "$MODEL_REPO_PATH/vocoder/1/vocoder.plan"

echo "Cleaning up intermediate files to reduce image size..."
rm -rf "$F5_TTS_TRT_LLM_CHECKPOINT_PATH"
rm -f "$ONNX_VOCODER_PATH"

echo "F5-TTS asset build completed for model: $MODEL_ID"

新建Dockerfile:vi Dockerfile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
FROM nvcr.io/nvidia/tritonserver:24.12-py3

ARG MODEL_ID=F5TTS_Base
ENV MODEL_ID=${MODEL_ID}
ENV PYTHONIOENCODING=utf-8
ENV DEBIAN_FRONTEND=noninteractive
ENV HF_MIRROR=https://hf-mirror.com
ENV HF_ENDPOINT=https://hf-mirror.com

RUN apt-get update && apt-get install -y --no-install-recommends \
git \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /workspace

COPY requirements-pytorch.txt /workspace/requirements-pytorch.txt

RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \
pip install --no-cache-dir -r /workspace/requirements-pytorch.txt && \
pip install --no-cache-dir \
tritonclient[grpc] \
tensorrt-llm==0.16.0 \
torchaudio==2.5.1 \
jieba \
pypinyin \
librosa \
vocos \
huggingface_hub[cli]

# 这里需要ghproxy镜像
RUN git clone https://ghfast.top/https://github.com/SWivid/F5-TTS.git

COPY build_F5_tts_assets.sh /workspace/F5-TTS/src/f5_tts/runtime/triton_trtllm/build_F5_tts_assets.sh
RUN chmod +x /workspace/F5-TTS/src/f5_tts/runtime/triton_trtllm/build_F5_tts_assets.sh

WORKDIR /workspace/F5-TTS/src/f5_tts/runtime/triton_trtllm/
RUN ./build_F5_tts_assets.sh ${MODEL_ID}

EXPOSE 8000 8001 8002

CMD ["tritonserver", "--model-repository=/workspace/F5-TTS/src/f5_tts/runtime/triton_trtllm/model_repo", "--strict-model-config=false", "--log-verbose=1"]

修改Compose:vi docker-compose.yml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
services:
tts:
image: flymyd114-triton-f5-tts:${MODEL_ID:-F5TTS_Base}-prebuilt
build:
context: .
dockerfile: Dockerfile
args:
MODEL_ID: ${MODEL_ID:-F5TTS_Base}
shm_size: '2gb'
ports:
- "8000:8000"
- "8001:8001"
- "8002:8002"
environment:
- PYTHONIOENCODING=utf-8
- MODEL_ID=${MODEL_ID:-F5TTS_Base}
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]

构建镜像

1
2
3
cd F5-TTS/src/f5_tts/runtime/triton_trtllm/
export MODEL_ID=F5TTS_Base
docker build --build-arg MODEL_ID=${MODEL_ID} -t flymyd114-triton-f5-tts:${MODEL_ID}-prebuilt .

构建后启动镜像

1
2
export MODEL_ID=F5TTS_Base
docker compose up