This commit is contained in:
2025-10-24 11:21:01 +04:00
parent dea3b0ec7f
commit 052557537f
19 changed files with 34 additions and 467 deletions

View File

@@ -69,4 +69,4 @@ vllm bench throughput \
VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1
vllm bench throughput --model openai/gpt-oss-20b --dataset-name random --input-len 12000 --output-len 6000 --num-prompts 8 --max-model-len 20000 --gpu-memory-utilization 0.7 --kv-cache-dtype fp8
vllm bench throughput --model openai/gpt-oss-20b --dataset-name random --input-len 12000 --output-len 6000 --num-prompts 8 --max-model-len 20000 --gpu-memory-utilization 0.7 --kv-cache-dtype fp8