文本到图像¶
来源 https://github.com/vllm-project/vllm-omni/tree/main/examples/online_serving/text_to_image.
此示例演示了如何使用 vLLM-Omni 部署 Qwen-Image 模型以提供在线图像生成服务。
启动服务器¶
基本启动¶
带参数启动¶
或使用启动脚本
API 调用¶
方法 1: 使用 curl¶
# Basic text-to-image generation
bash run_curl_text_to_image.sh
# Or execute directly
curl -s https://:8091/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"messages": [
{"role": "user", "content": "A beautiful landscape painting"}
],
"extra_body": {
"height": 1024,
"width": 1024,
"num_inference_steps": 50,
"true_cfg_scale": 4.0,
"seed": 42
}
}' | jq -r '.choices[0].message.content[0].image_url.url' | cut -d',' -f2 | base64 -d > output.png
方法 2: 使用 Python 客户端¶
方法 3: 使用 Gradio Demo¶
请求格式¶
简单的文本生成¶
带参数的生成¶
使用 extra_body 传递生成参数
{
"messages": [
{"role": "user", "content": "A beautiful landscape painting"}
],
"extra_body": {
"height": 1024,
"width": 1024,
"num_inference_steps": 50,
"true_cfg_scale": 4.0,
"seed": 42
}
}
多模态输入 (文本 + 结构化内容)¶
{
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "A beautiful landscape painting"}
]
}
]
}
生成参数 (extra_body)¶
| 参数 | 类型 | 默认值 | 描述 |
|---|---|---|---|
height | int | None | 图像高度(像素) |
width | int | None | 图像宽度(像素) |
size | str | None | 图像尺寸(例如,“1024x1024”) |
num_inference_steps | int | 50 | 去噪步数 |
true_cfg_scale | float | 4.0 | Qwen-Image CFG 缩放比例 |
seed | int | None | 随机种子(可复现) |
negative_prompt | str | None | 负面提示 |
num_outputs_per_prompt | int | 1 | 要生成的图像数量 |
响应格式¶
{
"id": "chatcmpl-xxx",
"created": 1234567890,
"model": "Qwen/Qwen-Image",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": [{
"type": "image_url",
"image_url": {
"url": "data:image/png;base64,..."
}
}]
},
"finish_reason": "stop"
}],
"usage": {...}
}
提取图像¶
# Extract base64 from response and decode to image
cat response.json | jq -r '.choices[0].message.content[0].image_url.url' | cut -d',' -f2 | base64 -d > output.png
文件描述¶
| 文件 | 描述 |
|---|---|
run_server.sh | 服务器启动脚本 |
run_curl_text_to_image.sh | curl 示例 |
openai_chat_client.py | Python 客户端 |
gradio_demo.py | Gradio 交互式界面 |
示例材料¶
gradio_demo.py
#!/usr/bin/env python3
"""
Qwen-Image Gradio Demo for online serving.
Usage:
python gradio_demo.py [--server https://:8091] [--port 7860]
"""
import argparse
import base64
from io import BytesIO
import gradio as gr
import requests
from PIL import Image
def generate_image(
prompt: str,
height: int,
width: int,
steps: int,
cfg_scale: float,
seed: int | None,
negative_prompt: str,
server_url: str,
) -> Image.Image | None:
"""Generate an image using the chat completions API."""
messages = [{"role": "user", "content": prompt}]
# Build extra_body with generation parameters
extra_body = {
"height": height,
"width": width,
"num_inference_steps": steps,
"true_cfg_scale": cfg_scale,
}
if seed is not None and seed >= 0:
extra_body["seed"] = seed
if negative_prompt:
extra_body["negative_prompt"] = negative_prompt
# Build request payload
payload = {"messages": messages, "extra_body": extra_body}
try:
response = requests.post(
f"{server_url}/v1/chat/completions",
headers={"Content-Type": "application/json"},
json=payload,
timeout=300,
)
response.raise_for_status()
data = response.json()
content = data["choices"][0]["message"]["content"]
if isinstance(content, list) and len(content) > 0:
image_url = content[0].get("image_url", {}).get("url", "")
if image_url.startswith("data:image"):
_, b64_data = image_url.split(",", 1)
image_bytes = base64.b64decode(b64_data)
return Image.open(BytesIO(image_bytes))
return None
except Exception as e:
print(f"Error: {e}")
raise gr.Error(f"Generation failed: {e}")
def create_demo(server_url: str):
"""Create Gradio demo interface."""
with gr.Blocks(title="Qwen-Image Demo") as demo:
gr.Markdown("# Qwen-Image Online Generation")
gr.Markdown("Generate images using Qwen-Image model")
with gr.Row():
with gr.Column(scale=1):
prompt = gr.Textbox(
label="Prompt",
placeholder="Describe the image you want to generate...",
lines=3,
)
negative_prompt = gr.Textbox(
label="Negative Prompt",
placeholder="Describe what you don't want...",
lines=2,
)
with gr.Row():
height = gr.Slider(
label="Height",
minimum=256,
maximum=2048,
value=1024,
step=64,
)
width = gr.Slider(
label="Width",
minimum=256,
maximum=2048,
value=1024,
step=64,
)
with gr.Row():
steps = gr.Slider(
label="Inference Steps",
minimum=10,
maximum=100,
value=50,
step=5,
)
cfg_scale = gr.Slider(
label="True CFG Scale",
minimum=1.0,
maximum=20.0,
value=4.0,
step=0.5,
)
with gr.Row():
seed = gr.Number(
label="Random Seed (-1 for random)",
value=-1,
precision=0,
)
generate_btn = gr.Button("Generate Image", variant="primary")
with gr.Column(scale=1):
output_image = gr.Image(
label="Generated Image",
type="pil",
)
# Examples
gr.Examples(
examples=[
["A beautiful landscape painting with misty mountains", "", 1024, 1024, 50, 4.0, 42],
["A cute cat sitting on a windowsill with sunlight", "", 1024, 1024, 50, 4.0, 123],
["Cyberpunk style futuristic city with neon lights", "blurry, low quality", 1024, 768, 50, 4.0, 456],
["Chinese ink painting of bamboo forest with a house", "", 768, 1024, 50, 4.0, 789],
],
inputs=[prompt, negative_prompt, height, width, steps, cfg_scale, seed],
)
generate_btn.click(
fn=lambda p, h, w, st, c, se, n: generate_image(p, h, w, st, c, se if se >= 0 else None, n, server_url),
inputs=[prompt, height, width, steps, cfg_scale, seed, negative_prompt],
outputs=[output_image],
)
return demo
def main():
parser = argparse.ArgumentParser(description="Qwen-Image Gradio Demo")
parser.add_argument("--server", default="https://:8091", help="Server URL")
parser.add_argument("--port", type=int, default=7860, help="Gradio port")
parser.add_argument("--share", action="store_true", help="Create public link")
args = parser.parse_args()
print(f"Connecting to server: {args.server}")
demo = create_demo(args.server)
demo.launch(server_port=args.port, share=args.share)
if __name__ == "__main__":
main()
openai_chat_client.py
#!/usr/bin/env python3
"""
Qwen-Image OpenAI-compatible chat client for image generation.
Usage:
python openai_chat_client.py --prompt "A beautiful landscape" --output output.png
python openai_chat_client.py --prompt "A sunset" --height 1024 --width 1024 --steps 50 --seed 42
"""
import argparse
import base64
from pathlib import Path
import requests
def generate_image(
prompt: str,
server_url: str = "https://:8091",
height: int | None = None,
width: int | None = None,
steps: int | None = None,
true_cfg_scale: float | None = None,
seed: int | None = None,
negative_prompt: str | None = None,
num_outputs_per_prompt: int = 1,
) -> bytes | None:
"""Generate an image using the chat completions API.
Args:
prompt: Text description of the image
server_url: Server URL
height: Image height in pixels
width: Image width in pixels
steps: Number of inference steps
true_cfg_scale: Qwen-Image CFG scale
seed: Random seed
negative_prompt: Negative prompt
num_outputs_per_prompt: Number of images to generate
Returns:
Image bytes or None if failed
"""
messages = [{"role": "user", "content": prompt}]
# Build extra_body with generation parameters
extra_body = {}
if height is not None:
extra_body["height"] = height
if width is not None:
extra_body["width"] = width
if steps is not None:
extra_body["num_inference_steps"] = steps
if true_cfg_scale is not None:
extra_body["true_cfg_scale"] = true_cfg_scale
if seed is not None:
extra_body["seed"] = seed
if negative_prompt:
extra_body["negative_prompt"] = negative_prompt
if num_outputs_per_prompt > 1:
extra_body["num_outputs_per_prompt"] = num_outputs_per_prompt
# Build request payload
payload = {"messages": messages}
if extra_body:
payload["extra_body"] = extra_body
# Send request
try:
response = requests.post(
f"{server_url}/v1/chat/completions",
headers={"Content-Type": "application/json"},
json=payload,
timeout=300,
)
response.raise_for_status()
data = response.json()
# Extract image from response
content = data["choices"][0]["message"]["content"]
if isinstance(content, list) and len(content) > 0:
image_url = content[0].get("image_url", {}).get("url", "")
if image_url.startswith("data:image"):
_, b64_data = image_url.split(",", 1)
return base64.b64decode(b64_data)
print(f"Unexpected response format: {content}")
return None
except Exception as e:
print(f"Error: {e}")
return None
def main():
parser = argparse.ArgumentParser(description="Qwen-Image chat client")
parser.add_argument("--prompt", "-p", default="a cup of coffee on the table", help="Text prompt")
parser.add_argument("--output", "-o", default="qwen_image_output.png", help="Output file")
parser.add_argument("--server", "-s", default="https://:8091", help="Server URL")
parser.add_argument("--height", type=int, default=1024, help="Image height")
parser.add_argument("--width", type=int, default=1024, help="Image width")
parser.add_argument("--steps", type=int, default=50, help="Inference steps")
parser.add_argument("--cfg-scale", type=float, default=4.0, help="True CFG scale")
parser.add_argument("--seed", type=int, default=42, help="Random seed")
parser.add_argument("--negative", help="Negative prompt")
args = parser.parse_args()
print(f"Generating image for: {args.prompt}")
image_bytes = generate_image(
prompt=args.prompt,
server_url=args.server,
height=args.height,
width=args.width,
steps=args.steps,
true_cfg_scale=args.cfg_scale,
seed=args.seed,
negative_prompt=args.negative,
)
if image_bytes:
output_path = Path(args.output)
output_path.write_bytes(image_bytes)
print(f"Image saved to: {output_path}")
print(f"Size: {len(image_bytes) / 1024:.1f} KB")
else:
print("Failed to generate image")
exit(1)
if __name__ == "__main__":
main()
run_curl_text_to_image.sh
#!/bin/bash
# Qwen-Image text-to-image curl example
SERVER="${SERVER:-http://:8091}"
PROMPT="${PROMPT:-a cup of coffee on the table}"
OUTPUT="${OUTPUT:-qwen_image_output.png}"
echo "Generating image..."
echo "Prompt: $PROMPT"
echo "Output: $OUTPUT"
curl -s "$SERVER/v1/chat/completions" \
-H "Content-Type: application/json" \
-d "{
\"messages\": [
{\"role\": \"user\", \"content\": \"$PROMPT\"}
],
\"extra_body\": {
\"height\": 1024,
\"width\": 1024,
\"num_inference_steps\": 50,
\"true_cfg_scale\": 4.0,
\"seed\": 42,
\"num_outputs_per_prompt\": 1
}
}" | jq -r '.choices[0].message.content[0].image_url.url' | cut -d',' -f2 | base64 -d > "$OUTPUT"
if [ -f "$OUTPUT" ]; then
echo "Image saved to: $OUTPUT"
echo "Size: $(du -h "$OUTPUT" | cut -f1)"
else
echo "Failed to generate image"
exit 1
fi