Powered by AppSignal & Oban Pro

Qwen Image Edit Plus

elixir/qwen_image_edit_plus.livemd

Qwen Image Edit Plus

# Livebook setup - copy this entire cell to run
Mix.install([
  {:pythonx, "~> 0.4.7"},
  {:jason, "~> 1.4.4"},
  {:req, "~> 0.5.0"},
  {:opentelemetry_api, "~> 1.3"},
  {:opentelemetry, "~> 1.3"},
  {:opentelemetry_exporter, "~> 1.0"},
])

# Configure OpenTelemetry for console logging
Application.put_env(:opentelemetry, :span_processor, :batch)
Application.put_env(:opentelemetry, :traces_exporter, :none)
Application.put_env(:opentelemetry, :metrics_exporter, :none)
Application.put_env(:opentelemetry, :logs_exporter, :none)

Logger.configure(level: :info)

Setup Python Environment

# Initialize Python environment with Qwen Image Edit Plus dependencies
Pythonx.uv_init("""
[project]
name = "qwen-image-edit-plus"
version = "0.0.0"
requires-python = "==3.10.*"
dependencies = [
  "diffusers @ git+https://github.com/huggingface/diffusers",
  "transformers",
  "accelerate",
  "pillow",
  "torch",
  "torchvision",
  "numpy",
  "huggingface-hub",
  "gitpython",
  "bitsandbytes",
]

[tool.uv.sources]
torch = { index = "pytorch-cu118" }
torchvision = { index = "pytorch-cu118" }

[[tool.uv.index]]
name = "pytorch-cu118"
url = "https://download.pytorch.org/whl/cu118"
explicit = true
""")

IO.puts("✓ Python environment initialized with Qwen Image Edit Plus dependencies")

Configuration

# Configure the image editing parameters
config = %{
  images: ["path/to/image1.jpg", "path/to/image2.jpg"],  # 1-3 input images
  prompt: "Make the background blue and add some clouds",
  go_fast: true,
  use_4bit: false,
  aspect_ratio: "match_input_image",
  output_format: "webp",
  output_quality: 95
}

IO.puts("Configuration:")
IO.inspect(config, pretty: true)

Model Download (Optional)

# Download Qwen Image Edit Plus models (optional - models will be downloaded automatically)
# This cell is optional as the models will be downloaded automatically during inference

# Uncomment to pre-download:
# repo_id = "Qwen/Qwen-Image-Edit-2509"
# IO.puts("Downloading Qwen Image Edit Plus models...")
# HuggingFaceDownloader.download_repo(repo_id, "pretrained_weights/Qwen-Image-Edit-2509", "Qwen-Image-Edit-2509", true)

Qwen Image Edit Plus

# Save config to JSON for Python
config_json = Jason.encode!(config)
config_file = "/tmp/qwen_edit_config_#{System.system_time(:millisecond)}.json"
File.write!(config_file, config_json)

# Run image editing
try do
  Pythonx.eval(~S"""
import json
import os
import sys
import logging
from pathlib import Path
from PIL import Image
import torch
from diffusers import QwenImageEditPlusPipeline

logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
logging.getLogger("transformers").setLevel(logging.ERROR)
logging.getLogger("diffusers").setLevel(logging.ERROR)
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"

from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

_original_tqdm_init = tqdm.__init__
def _silent_tqdm_init(self, *args, **kwargs):
    kwargs['disable'] = True
    return _original_tqdm_init(self, *args, **kwargs)
tqdm.__init__ = _silent_tqdm_init

cpu_count = os.cpu_count()
half_cpu_count = cpu_count // 2
os.environ["MKL_NUM_THREADS"] = str(half_cpu_count)
os.environ["OMP_NUM_THREADS"] = str(half_cpu_count)
torch.set_num_threads(half_cpu_count)

# Load configuration
""" <> """
config_file_path = r"#{String.replace(config_file, "\\", "\\\\")}"
with open(config_file_path, 'r', encoding='utf-8') as f:
    config = json.load(f)

images = config.get('images', [])
prompt = config.get('prompt')
go_fast = config.get('go_fast', True)
use_4bit = config.get('use_4bit', False)
aspect_ratio = config.get('aspect_ratio', 'match_input_image')
output_format = config.get('output_format', 'webp')
output_quality = config.get('output_quality', 95)

device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.bfloat16 if device == "cuda" else torch.float32

# Performance optimizations
if device == "cuda":
    torch.set_float32_matmul_precision("high")
    torch._inductor.config.conv_1x1_as_mm = True
    torch._inductor.config.coordinate_descent_tuning = True
    torch._inductor.config.epilogue_fusion = False
    torch._inductor.config.coordinate_descent_check_all_directions = True

# Choose model
if use_4bit and device == "cuda":
    MODEL_ID = "ovedrive/Qwen-Image-Edit-2509-4bit"
    print("[INFO] Using 4-bit quantized model")
    dtype = None
else:
    MODEL_ID = "Qwen/Qwen-Image-Edit-2509"
    dtype = torch.bfloat16 if device == "cuda" else torch.float32

print(f"[INFO] Device: {device}")
print(f"[INFO] Loading model: {MODEL_ID}")

# Load pipeline
load_kwargs = {}
if dtype is not None:
    load_kwargs["torch_dtype"] = dtype

pipe = QwenImageEditPlusPipeline.from_pretrained(MODEL_ID, **load_kwargs)
pipe = pipe.to(device)

# Performance optimizations
if device == "cuda":
    try:
        if hasattr(pipe, 'transformer'):
            pipe.transformer.to(memory_format=torch.channels_last)
        if hasattr(pipe, 'vae') and hasattr(pipe.vae, 'decode'):
            pipe.vae.to(memory_format=torch.channels_last)
        print("[OK] Memory format optimized")
    except Exception as e:
        print(f"[INFO] Memory format optimization: {e}")

    try:
        import triton
        if hasattr(pipe, 'transformer'):
            pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False)
        if hasattr(pipe, 'vae') and hasattr(pipe.vae, 'decode'):
            pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=False)
        print("[OK] torch.compile enabled")
    except ImportError:
        print("[INFO] Triton not installed - skipping torch.compile")
    except Exception as e:
        if "Triton" in str(e) or "triton" in str(e).lower():
            print("[INFO] Triton not available - skipping torch.compile")
        else:
            print(f"[INFO] torch.compile not available: {e}")

print(f"[OK] Pipeline loaded on {device}")
""", %{})

  # Load and process images
  input_images = Enum.map(config.images, fn img_path ->
    if File.exists?(img_path) do
      Pythonx.eval(~S"""
from PIL import Image
img = Image.open(r"#{String.replace(img_path, "\\", "\\\\")}").convert("RGB")
print(f"[OK] Loaded image: #{Path(img_path).name} ({img.size[0]}x{img.size[1]})")
""", %{})
      img_path
    else
      IO.puts("❌ Image not found: #{img_path}")
      nil
    end
  end) |> Enum.reject(&amp;is_nil/1)

  if Enum.empty?(input_images) do
    raise "No valid input images found"
  end

  # Edit with loaded pipeline
  Pythonx.eval(~S"""
import json
import time
from pathlib import Path
""" <> """
config_file_path = r"#{String.replace(config_file, "\\", "\\\\")}"
with open(config_file_path, 'r', encoding='utf-8') as f:
    config = json.load(f)

images = #{inspect(input_images)}
prompt = "#{String.replace(config.prompt, "\"", "\\\"")}"
go_fast = config.get('go_fast', True)
aspect_ratio = config.get('aspect_ratio', 'match_input_image')
output_format = config.get('output_format', 'webp')
output_quality = config.get('output_quality', 95)

# Load input images
input_images = []
for img_path in images:
    img_path_resolved = Path(img_path).resolve()
    if not img_path_resolved.exists():
        raise FileNotFoundError(f"Image file not found: {img_path_resolved}")
    img = Image.open(str(img_path_resolved)).convert("RGB")
    input_images.append(img)

output_dir = Path("output")
output_dir.mkdir(exist_ok=True)

# Prepare generator
generator = torch.Generator(device=device)
generator.manual_seed(0)

# Map go_fast to num_inference_steps
num_inference_steps = 20 if go_fast else 40

print(f"[INFO] Starting edit: {prompt[:50]}...")
print(f"[INFO] Parameters: {len(input_images)} image(s), {num_inference_steps} steps")

# Edit image
with torch.inference_mode():
    output = pipe(
        image=input_images,
        prompt=prompt,
        generator=generator,
        true_cfg_scale=4.0,
        negative_prompt=" ",
        num_inference_steps=num_inference_steps,
        guidance_scale=1.0,
        num_images_per_prompt=1,
    )

print("[INFO] Editing complete, processing image...")

image = output.images[0]

tag = time.strftime("%Y%m%d_%H_%M_%S")
export_dir = output_dir / tag
export_dir.mkdir(exist_ok=True)

output_filename = f"qwen_edit_{tag}.{output_format}"
output_path = export_dir / output_filename

if output_format.lower() == "webp":
    image.save(str(output_path), "WEBP", quality=output_quality)
elif output_format.lower() in ["jpg", "jpeg"]:
    if image.mode == "RGBA":
        background = Image.new("RGB", image.size, (255, 255, 255))
        background.paste(image, mask=image.split()[3] if image.mode == "RGBA" else None)
        image = background
    image.save(str(output_path), "JPEG", quality=output_quality)
else:
    image.save(str(output_path), "PNG")

print(f"[OK] Saved image to {output_path}")
print(f"OUTPUT_PATH:{output_path}")
""", %{})

  # Find the edited image
  output_dir = Path.expand("output")
  if File.exists?(output_dir) do
    dirs = output_dir
             |> File.ls!()
             |> Enum.filter(&amp;File.dir?(Path.join(output_dir, &amp;1)))
             |> Enum.sort(:desc)

    if dirs != [] do
      latest_dir = List.first(dirs)
      pattern = "qwen_edit_*.#{config.output_format}"
      files = Path.join([output_dir, latest_dir]) |> Path.join(pattern) |> Path.wildcard()
      if files != [] do
        output_path = List.first(files)
        IO.puts("✓ Image edited successfully!")
        IO.puts("Output: #{output_path}")
      else
        IO.puts("❌ No output file found")
      end
    else
      IO.puts("❌ No output directory found")
    end
  else
    IO.puts("❌ Output directory does not exist")
  end

rescue
  e ->
    IO.puts("❌ Error during image editing: #{inspect(e)}")
after
  # Cleanup
  File.rm(config_file)
end

Usage Instructions

  1. Setup: Run the first cell to install dependencies
  2. Configure: Update the config map with your image paths and prompt
  3. Run: Execute the editing cell to modify images
  4. Results: Check the output directory for edited images

Notes

  • Requires CUDA-compatible GPU for best performance
  • Model will be downloaded automatically on first run (~20GB)
  • Supports 1-3 input images for advanced editing
  • Use 4-bit quantization to reduce VRAM requirements