...
This commit is contained in:
@@ -69,4 +69,4 @@ vllm bench throughput \
|
||||
|
||||
|
||||
VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1
|
||||
vllm bench throughput --model openai/gpt-oss-20b --dataset-name random --input-len 12000 --output-len 6000 --num-prompts 8 --max-model-len 20000 --gpu-memory-utilization 0.7 --kv-cache-dtype fp8
|
||||
vllm bench throughput --model openai/gpt-oss-20b --dataset-name random --input-len 12000 --output-len 6000 --num-prompts 8 --max-model-len 20000 --gpu-memory-utilization 0.7 --kv-cache-dtype fp8
|
||||
|
||||
Reference in New Issue
Block a user