import os
from gtts import gTTS
from moviepy.editor import (
    AudioFileClip, ImageClip, CompositeVideoClip, ColorClip,
    concatenate_videoclips, vfx
)
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import contextlib

# ===== 設定 =====
INPUT_DIR  = "input"
OUTPUT_DIR = "output"
TEMP_DIR   = "temp"
SIZE       = (1280, 720)
FPS        = 24

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(TEMP_DIR, exist_ok=True)

# ===== タイトル画像生成（スライドが無い場合用） =====
def make_title_clip(title: str, duration: float, size=SIZE):
    w, h = size
    img = Image.new("RGB", size, (0, 0, 0))
    draw = ImageDraw.Draw(img)

    font_candidates = [
        "Yu Gothic UI.ttf", "YuGothR.ttc", "Meiryo.ttc",
        "/System/Library/Fonts/ヒラギノ角ゴシック W6.ttc",
    ]
    font = None
    for path in font_candidates:
        try:
            font = ImageFont.truetype(path, size=64)
            break
        except:
            pass
    if font is None:
        font = ImageFont.load_default()

    max_w = int(w * 0.9)
    lines, line = [], ""
    for ch in title:
        tw, _ = draw.textbbox((0, 0), line + ch, font=font)[2:]
        if tw <= max_w:
            line += ch
        else:
            lines.append(line)
            line = ch
    if line:
        lines.append(line)

    total_h = sum(draw.textbbox((0, 0), l, font=font)[3] for l in lines) + 10 * (len(lines) - 1)
    y = (h - total_h) // 2
    for l in lines:
        _, _, tw, th = draw.textbbox((0, 0), l, font=font)
        x = (w - tw) // 2
        draw.text((x, y), l, fill=(255, 255, 255), font=font)
        y += th + 10

    return ImageClip(np.array(img)).set_duration(duration)

def find_slide_image(index: int):
    for ext in (".png", ".jpg", ".jpeg", ".webp"):
        p = os.path.join(INPUT_DIR, f"slide_{index}{ext}")
        if os.path.exists(p):
            return p
    return None

# ===== 原稿読み込み =====
with open(os.path.join(INPUT_DIR, "narration.txt"), "r", encoding="utf-8") as f:
    raw = f.read()

# 「#」区切りでセクション分割
sections = [s.strip() for s in raw.split("#") if s.strip()]

video_clips = []

for i, section in enumerate(sections):
    lines = section.splitlines()
    if not lines:
        continue

    title = lines[0].strip()
    body = "\n".join(lines[1:]).strip()
    if not body:
        continue

    # 音声生成
    voice_path = os.path.join(TEMP_DIR, f"voice_{i}.mp3")
    tts = gTTS(body, lang="ja")
    tts.save(voice_path)

    audio_clip = AudioFileClip(voice_path)
    duration = audio_clip.duration

    # 画像
    image_path = find_slide_image(i)
    if image_path:
        img_clip = ImageClip(image_path).set_duration(duration)
    else:
        bg = ColorClip(size=SIZE, color=(0, 0, 0), duration=duration)
        title_clip = make_title_clip(title, duration)
        img_clip = CompositeVideoClip([bg, title_clip.set_position("center")], size=SIZE).set_duration(duration)

    clip = img_clip.set_audio(audio_clip)
    video_clips.append((clip, audio_clip))

# ===== 出力 =====
if not video_clips:
    raise RuntimeError("動画にできるクリップがありません。")

final = concatenate_videoclips([vc for vc, _ in video_clips], method="compose")
final = final.fx(vfx.speedx, 1.5)

out_path = os.path.join(OUTPUT_DIR, "final_video.mp4")
final.write_videofile(out_path, fps=FPS)

# 後片付け
with contextlib.suppress(Exception):
    final.close()
for vc, ac in video_clips:
    with contextlib.suppress(Exception):
        vc.close()
        ac.close()

print(f"[done] {out_path}")
