...

2025-10-24 11:21:01 +04:00
parent dea3b0ec7f
commit 052557537f
19 changed files with 34 additions and 467 deletions
--- a/examples/readme.md
+++ b/examples/readme.md
@@ -69,4 +69,4 @@ vllm bench throughput \


 VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1 
-vllm bench throughput   --model openai/gpt-oss-20b   --dataset-name random   --input-len 12000   --output-len 6000   --num-prompts 8   --max-model-len 20000   --gpu-memory-utilization 0.7   --kv-cache-dtype fp8
+vllm bench throughput   --model openai/gpt-oss-20b   --dataset-name random   --input-len 12000   --output-len 6000   --num-prompts 8   --max-model-len 20000   --gpu-memory-utilization 0.7    --kv-cache-dtype fp8