From 11b0910b38faba086c50d95cdbead0d472394e6a Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Thu, 10 Jul 2025 19:46:12 +0100 Subject: [PATCH] wip: AI assistant in desktop editor --- apps/desktop/src-tauri/src/ai_vision.rs | 89 ++ apps/desktop/src-tauri/src/lib.rs | 5 +- .../desktop/src/routes/editor/AIAssistant.tsx | 947 ++++++++++++++++++ apps/desktop/src/routes/editor/Header.tsx | 2 + apps/desktop/src/utils/tauri.ts | 13 + apps/web/app/api/desktop/[...route]/root.ts | 20 +- apps/web/app/api/desktop/ai-editor/route.ts | 322 ++++++ packages/ui-solid/src/auto-imports.d.ts | 5 + 8 files changed, 1393 insertions(+), 10 deletions(-) create mode 100644 apps/desktop/src-tauri/src/ai_vision.rs create mode 100644 apps/desktop/src/routes/editor/AIAssistant.tsx create mode 100644 apps/web/app/api/desktop/ai-editor/route.ts diff --git a/apps/desktop/src-tauri/src/ai_vision.rs b/apps/desktop/src-tauri/src/ai_vision.rs new file mode 100644 index 000000000..bafc639b5 --- /dev/null +++ b/apps/desktop/src-tauri/src/ai_vision.rs @@ -0,0 +1,89 @@ +use serde::{Deserialize, Serialize}; +use specta::Type; +use std::path::Path; +use tauri::AppHandle; + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct FrameAnalysis { + pub timestamp: f64, + pub objects: Vec, + pub scene_description: String, + pub dominant_colors: Vec, + pub motion_intensity: f64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct DetectedObject { + pub label: String, + pub confidence: f64, + pub bounding_box: BoundingBox, + pub attributes: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct BoundingBox { + pub x: f64, + pub y: f64, + pub width: f64, + pub height: f64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct VideoContentAnalysis { + pub frames: Vec, + pub object_timelines: Vec, + pub scene_segments: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct ObjectTimeline { + pub label: String, + pub appearances: Vec, + pub attributes: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct TimeRange { + pub start: f64, + pub end: f64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct SceneSegment { + pub start: f64, + pub end: f64, + pub description: String, + pub tags: Vec, +} + +#[tauri::command] +#[specta::specta] +pub async fn analyze_video_content( + app: AppHandle, + video_path: String, + frame_interval: f64, +) -> Result { + // This is a placeholder - in production you would: + // 1. Extract frames at intervals using ffmpeg + // 2. Send frames to vision API (OpenAI, local model, etc) + // 3. Aggregate results into timeline + + // For now, return mock data to demonstrate the structure + Ok(VideoContentAnalysis { + frames: vec![], + object_timelines: vec![], + scene_segments: vec![], + }) +} + +#[tauri::command] +#[specta::specta] +pub async fn analyze_frame_batch( + app: AppHandle, + video_path: String, + timestamps: Vec, +) -> Result, String> { + // Extract specific frames and analyze them + // This allows on-demand analysis of specific moments + Ok(vec![]) +} diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 87fa91ed8..93e7dd021 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -1,3 +1,4 @@ +mod ai_vision; mod audio; mod audio_meter; mod auth; @@ -1808,7 +1809,9 @@ pub async fn run(recording_logging_handle: LoggingHandle) { captions::download_whisper_model, captions::check_model_exists, captions::delete_whisper_model, - captions::export_captions_srt + captions::export_captions_srt, + ai_vision::analyze_video_content, + ai_vision::analyze_frame_batch ]) .events(tauri_specta::collect_events![ RecordingOptionsChanged, diff --git a/apps/desktop/src/routes/editor/AIAssistant.tsx b/apps/desktop/src/routes/editor/AIAssistant.tsx new file mode 100644 index 000000000..ab59c29ae --- /dev/null +++ b/apps/desktop/src/routes/editor/AIAssistant.tsx @@ -0,0 +1,947 @@ +import { + createSignal, + createMemo, + For, + Show, + onMount, + createEffect, +} from "solid-js"; +import { createStore, produce } from "solid-js/store"; +import { Button } from "@cap/ui-solid"; +import { cx } from "cva"; +import toast from "solid-toast"; +import { fetch } from "@tauri-apps/plugin-http"; +import { + commands, + type ProjectConfiguration, + type AspectRatio, +} from "~/utils/tauri"; +import { clientEnv } from "~/utils/env"; +import { maybeProtectedHeaders } from "~/utils/web-api"; +import { useEditorContext } from "./context"; +import { convertFileSrc } from "@tauri-apps/api/core"; +import { resolveResource } from "@tauri-apps/api/path"; + +// Available background presets +const AVAILABLE_WALLPAPERS = [ + // macOS wallpapers + "macOS/sequoia-dark", + "macOS/sequoia-light", + "macOS/sonoma-clouds", + "macOS/sonoma-dark", + "macOS/sonoma-evening", + "macOS/sonoma-fromabove", + "macOS/sonoma-horizon", + "macOS/sonoma-light", + "macOS/sonoma-river", + "macOS/ventura-dark", + "macOS/ventura-semi-dark", + "macOS/ventura", + // Blue wallpapers + "blue/1", + "blue/2", + "blue/3", + "blue/4", + "blue/5", + "blue/6", + // Purple wallpapers + "purple/1", + "purple/2", + "purple/3", + "purple/4", + "purple/5", + "purple/6", + // Dark wallpapers + "dark/1", + "dark/2", + "dark/3", + "dark/4", + "dark/5", + "dark/6", + // Orange wallpapers + "orange/1", + "orange/2", + "orange/3", + "orange/4", + "orange/5", + "orange/6", + "orange/7", + "orange/8", + "orange/9", +]; + +const AVAILABLE_COLORS = [ + "#FF0000", + "#FF4500", + "#FF8C00", + "#FFD700", + "#FFFF00", + "#ADFF2F", + "#32CD32", + "#008000", + "#00CED1", + "#4785FF", + "#0000FF", + "#4B0082", + "#800080", + "#A9A9A9", + "#FFFFFF", + "#000000", +]; + +const AVAILABLE_GRADIENTS = [ + { from: [15, 52, 67], to: [52, 232, 158] }, + { from: [34, 193, 195], to: [253, 187, 45] }, + { from: [29, 253, 251], to: [195, 29, 253] }, + { from: [69, 104, 220], to: [176, 106, 179] }, + { from: [106, 130, 251], to: [252, 92, 125] }, + { from: [131, 58, 180], to: [253, 29, 29] }, + { from: [249, 212, 35], to: [255, 78, 80] }, + { from: [255, 94, 0], to: [255, 42, 104] }, + { from: [255, 0, 150], to: [0, 204, 255] }, + { from: [0, 242, 96], to: [5, 117, 230] }, + { from: [238, 205, 163], to: [239, 98, 159] }, + { from: [44, 62, 80], to: [52, 152, 219] }, + { from: [168, 239, 255], to: [238, 205, 163] }, + { from: [74, 0, 224], to: [143, 0, 255] }, + { from: [252, 74, 26], to: [247, 183, 51] }, + { from: [0, 255, 255], to: [255, 20, 147] }, + { from: [255, 127, 0], to: [255, 255, 0] }, + { from: [255, 0, 255], to: [0, 255, 0] }, +]; + +// Map of aspect ratio strings to their dimensions +const ASPECT_RATIO_MAP = { + wide: { width: 16, height: 9 }, + vertical: { width: 9, height: 16 }, + square: { width: 1, height: 1 }, + classic: { width: 4, height: 3 }, + tall: { width: 3, height: 4 }, +}; + +// Function to convert aspect ratio object to predefined string +function normalizeAspectRatio(aspectRatio: any): AspectRatio | null { + if (!aspectRatio) return null; + + // If it's already a string and valid, return it + if (typeof aspectRatio === "string" && aspectRatio in ASPECT_RATIO_MAP) { + return aspectRatio as AspectRatio; + } + + // If it's an object with width and height, try to match it to a predefined ratio + if ( + aspectRatio && + typeof aspectRatio === "object" && + "width" in aspectRatio && + "height" in aspectRatio + ) { + const ratio = aspectRatio.width / aspectRatio.height; + + // Check each predefined aspect ratio + for (const [key, dimensions] of Object.entries(ASPECT_RATIO_MAP)) { + const predefinedRatio = dimensions.width / dimensions.height; + // Allow small tolerance for floating point comparison + if (Math.abs(ratio - predefinedRatio) < 0.01) { + return key as AspectRatio; + } + } + } + + // If no match found, return null (auto) + return null; +} + +// Temporary type definitions until TypeScript bindings are regenerated +interface VideoContentAnalysis { + frames: FrameAnalysis[]; + objectTimelines: ObjectTimeline[]; + sceneSegments: SceneSegment[]; +} + +interface FrameAnalysis { + timestamp: number; + objects: DetectedObject[]; + sceneDescription: string; + dominantColors: string[]; + motionIntensity: number; +} + +interface DetectedObject { + label: string; + confidence: number; + boundingBox: { + x: number; + y: number; + width: number; + height: number; + }; + attributes: string[]; +} + +interface ObjectTimeline { + label: string; + appearances: TimeRange[]; + attributes: string[]; +} + +interface TimeRange { + start: number; + end: number; +} + +interface SceneSegment { + start: number; + end: number; + description: string; + tags: string[]; +} + +interface Message { + id: string; + role: "user" | "assistant"; + content: string; + timestamp: Date; + status?: "pending" | "success" | "error"; + appliedConfig?: ProjectConfiguration; +} + +interface AIAssistantState { + isOpen: boolean; + isExpanded: boolean; + messages: Message[]; + input: string; + isLoading: boolean; + configHistory: ProjectConfiguration[]; + currentHistoryIndex: number; + videoContent: VideoContentAnalysis | null; + isAnalyzingVideo: boolean; +} + +// Helper function for safe cloning +function safeClone(obj: T): T { + try { + return JSON.parse(JSON.stringify(obj)); + } catch (error) { + console.error("Failed to clone object:", error); + return obj; + } +} + +export function AIAssistant() { + const { project, setProject, editorInstance, meta } = useEditorContext(); + + const [state, setState] = createStore({ + isOpen: false, + isExpanded: false, + messages: [], + input: "", + isLoading: false, + configHistory: [], + currentHistoryIndex: -1, + videoContent: null, + isAnalyzingVideo: false, + }); + + let inputRef: HTMLTextAreaElement | undefined; + let messagesEndRef: HTMLDivElement | undefined; + + // Initialize with the current config + onMount(() => { + setState("configHistory", [safeClone(project)]); + setState("currentHistoryIndex", 0); + }); + + // Auto-scroll to bottom when new messages are added + createEffect(() => { + if (state.messages.length && messagesEndRef) { + messagesEndRef.scrollIntoView({ behavior: "smooth" }); + } + }); + + // Auto-focus input when overlay opens + createEffect(() => { + if (state.isOpen && inputRef) { + // Small delay to ensure the overlay is rendered + setTimeout(() => inputRef?.focus(), 150); + } + }); + + const canUndo = createMemo(() => state.currentHistoryIndex > 0); + const canRedo = createMemo( + () => state.currentHistoryIndex < state.configHistory.length - 1 + ); + + const lastMessage = createMemo(() => { + const messages = state.messages; + if (messages.length === 0) return null; + // Find the last assistant message + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i].role === "assistant") { + return messages[i]; + } + } + return null; + }); + + const addMessage = ( + role: Message["role"], + content: string, + appliedConfig?: ProjectConfiguration + ) => { + const message: Message = { + id: crypto.randomUUID(), + role, + content, + timestamp: new Date(), + status: role === "user" ? "success" : "pending", + appliedConfig, + }; + setState("messages", [...state.messages, message]); + return message.id; + }; + + const updateMessageStatus = (id: string, status: Message["status"]) => { + setState("messages", (messages) => + messages.map((msg) => (msg.id === id ? { ...msg, status } : msg)) + ); + }; + + const resolveWallpaperPath = async (wallpaperId: string) => { + try { + const resolvedPath = await resolveResource( + `assets/backgrounds/${wallpaperId}.jpg` + ); + return resolvedPath; + } catch (error) { + console.error("Failed to resolve wallpaper path:", error); + return wallpaperId; // Fallback to ID if resolution fails + } + }; + + const applyConfiguration = async (newConfig: ProjectConfiguration) => { + try { + // Normalize the aspect ratio before applying + const normalizedConfig = { + ...newConfig, + aspectRatio: normalizeAspectRatio(newConfig.aspectRatio), + }; + + // Resolve wallpaper paths if they are IDs + let resolvedConfig = normalizedConfig; + if ( + normalizedConfig.background.source.type === "wallpaper" && + normalizedConfig.background.source.path && + AVAILABLE_WALLPAPERS.includes(normalizedConfig.background.source.path) + ) { + const resolvedPath = await resolveWallpaperPath( + normalizedConfig.background.source.path + ); + resolvedConfig = { + ...normalizedConfig, + background: { + ...normalizedConfig.background, + source: { + ...normalizedConfig.background.source, + path: resolvedPath, + }, + }, + }; + } + + // Apply the resolved configuration + setProject(resolvedConfig); + + // Save to backend + await commands.setProjectConfig(resolvedConfig); + + // Add to history + setState( + "configHistory", + produce((history) => { + // Remove any history after current index (for redo functionality) + history.splice(state.currentHistoryIndex + 1); + // Add new config + history.push(safeClone(resolvedConfig)); + }) + ); + setState("currentHistoryIndex", state.configHistory.length); + + return true; + } catch (error) { + console.error("Failed to apply configuration:", error); + toast.error("Failed to apply changes"); + return false; + } + }; + + const undo = async () => { + if (!canUndo()) return; + + const newIndex = state.currentHistoryIndex - 1; + const config = state.configHistory[newIndex]; + + setProject(config); + await commands.setProjectConfig(config); + setState("currentHistoryIndex", newIndex); + + toast("Reverted to previous configuration"); + }; + + const redo = async () => { + if (!canRedo()) return; + + const newIndex = state.currentHistoryIndex + 1; + const config = state.configHistory[newIndex]; + + setProject(config); + await commands.setProjectConfig(config); + setState("currentHistoryIndex", newIndex); + + toast("Applied next configuration"); + }; + + const analyzeVideo = async () => { + setState("isAnalyzingVideo", true); + + try { + // TODO: Uncomment when TypeScript bindings are regenerated + // const content = await commands.analyzeVideoContent( + // editorInstance.path, + // 2.0 + // ); + + // For now, use mock data to demonstrate the UI + const content: VideoContentAnalysis = { + frames: [], + objectTimelines: [], + sceneSegments: [], + }; + + setState("videoContent", content); + toast.success("Video analysis complete!"); + } catch (error) { + console.error("Failed to analyze video:", error); + toast.error("Failed to analyze video content"); + } finally { + setState("isAnalyzingVideo", false); + } + }; + + const handleSubmit = async (e?: Event) => { + e?.preventDefault(); + + const input = state.input.trim(); + if (!input || state.isLoading) return; + + // Add user message + addMessage("user", input); + setState("input", ""); + setState("isLoading", true); + + // Create assistant message + const assistantMessageId = addMessage("assistant", "Thinking..."); + + try { + // Get auth headers if available + const authHeaders = await maybeProtectedHeaders(); + + // Build headers object + const headers: Record = { + "Content-Type": "application/json", + }; + + if (authHeaders.authorization) { + headers.authorization = authHeaders.authorization; + } + + // Call the API + const response = await fetch( + `${clientEnv.VITE_SERVER_URL}/api/desktop/ai-editor`, + { + method: "POST", + headers, + body: JSON.stringify({ + prompt: input, + currentConfig: project, + editorContext: { + hasCamera: editorInstance.recordings.segments.some( + (s) => s.camera !== null + ), + hasAudio: editorInstance.recordings.segments.some( + (s) => s.mic !== null || s.system_audio !== null + ), + hasCursor: + meta().type === "multiple" && + !!(meta() as any).segments[0].cursor, + duration: editorInstance.recordingDuration, + }, + videoContent: state.videoContent, + conversationHistory: state.messages.map((msg) => ({ + role: msg.role, + content: msg.content, + })), + availableBackgrounds: { + wallpapers: AVAILABLE_WALLPAPERS, + colors: AVAILABLE_COLORS, + gradients: AVAILABLE_GRADIENTS, + }, + }), + } + ); + + if (!response.ok) { + throw new Error(`API error: ${response.status}`); + } + + const data = await response.json(); + + if (data.error) { + throw new Error(data.error); + } + + // Update assistant message with the response + setState("messages", (messages) => + messages.map((msg) => + msg.id === assistantMessageId + ? { + ...msg, + content: data.explanation || "Changes applied successfully!", + status: "success" as const, + } + : msg + ) + ); + + // Apply the new configuration + if (data.newConfig) { + const applied = await applyConfiguration(data.newConfig); + if (applied) { + // Update the message to include the applied config + setState("messages", (messages) => + messages.map((msg) => + msg.id === assistantMessageId + ? { ...msg, appliedConfig: data.newConfig } + : msg + ) + ); + } + } + } catch (error) { + console.error("AI Assistant error:", error); + updateMessageStatus(assistantMessageId, "error"); + setState("messages", (messages) => + messages.map((msg) => + msg.id === assistantMessageId + ? { + ...msg, + content: "Sorry, I encountered an error. Please try again.", + status: "error" as const, + } + : msg + ) + ); + toast.error("Failed to process your request"); + } finally { + setState("isLoading", false); + inputRef?.focus(); + } + }; + + return ( + <> + {/* AI Orb Button */} + + + {/* AI Assistant Overlay */} + + {/* Full Screen Click Handler (behind everything) */} +
setState("isOpen", false)} + /> + + {/* Timeline Darkening Backdrop */} +
+ + {/* AI Assistant Panel */} +
+
+ {/* Header */} +
+
+
+
+
+
+

Cap AI

+
+
+ + + + + + +
+
+ + {/* Content */} +
+ +
+ +
+ + Applying changes... +
+
+ +

+ {lastMessage()!.content} +

+
+
+
+ } + > + {/* Expanded view - show all messages */} +
+ 0} + fallback={ +
+

+ How can I help you edit your video? +

+ +
+

+ For content-aware editing, analyze your video + first: +

+ +
+
+
    +
  • • "Make the background blue with a gradient"
  • +
  • • "Move the camera to the top right"
  • +
  • • "Add padding and rounded corners"
  • +
  • • "Enable captions with larger font"
  • +
+
+ } + > + + {(message) => ( +
+
+ +
+ + Applying changes... +
+
+ +

+ {message.content} +

+
+ +

+ ✓ Changes applied +

+
+
+
+ )} +
+
+ +
+
+
+ + {/* Input */} +
+
+