CapSoftware
diff --git a/‎apps/desktop/src-tauri/src/ai_vision.rs
Lines changed: 89 additions & 0 deletions b/‎apps/desktop/src-tauri/src/ai_vision.rs
Lines changed: 89 additions & 0 deletions
diff --git a/‎apps/desktop/src-tauri/src/lib.rs
Lines changed: 4 additions & 1 deletion b/‎apps/desktop/src-tauri/src/lib.rs
Lines changed: 4 additions & 1 deletion
@@ -0,0 +1,89 @@
+use serde::{Deserialize, Serialize};
+use specta::Type;
+use std::path::Path;
+use tauri::AppHandle;
+
+#[derive(Debug, Clone, Serialize, Deserialize, Type)]
+pub struct FrameAnalysis {
+    pub timestamp: f64,
+    pub objects: Vec<DetectedObject>,
+    pub scene_description: String,
+    pub dominant_colors: Vec<String>,
+    pub motion_intensity: f64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Type)]
+pub struct DetectedObject {
+    pub label: String,
+    pub confidence: f64,
+    pub bounding_box: BoundingBox,
+    pub attributes: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Type)]
+pub struct BoundingBox {
+    pub x: f64,
+    pub y: f64,
+    pub width: f64,
+    pub height: f64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Type)]
+pub struct VideoContentAnalysis {
+    pub frames: Vec<FrameAnalysis>,
+    pub object_timelines: Vec<ObjectTimeline>,
+    pub scene_segments: Vec<SceneSegment>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Type)]
+pub struct ObjectTimeline {
+    pub label: String,
+    pub appearances: Vec<TimeRange>,
+    pub attributes: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Type)]
+pub struct TimeRange {
+    pub start: f64,
+    pub end: f64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Type)]
+pub struct SceneSegment {
+    pub start: f64,
+    pub end: f64,
+    pub description: String,
+    pub tags: Vec<String>,
+}
+
+#[tauri::command]
+#[specta::specta]
+pub async fn analyze_video_content(
+    app: AppHandle,
+    video_path: String,
+    frame_interval: f64,
+) -> Result<VideoContentAnalysis, String> {
+    // This is a placeholder - in production you would:
+    // 1. Extract frames at intervals using ffmpeg
+    // 2. Send frames to vision API (OpenAI, local model, etc)
+    // 3. Aggregate results into timeline
+
+    // For now, return mock data to demonstrate the structure
+    Ok(VideoContentAnalysis {
+        frames: vec![],
+        object_timelines: vec![],
+        scene_segments: vec![],
+    })
+}
+
+#[tauri::command]
+#[specta::specta]
+pub async fn analyze_frame_batch(
+    app: AppHandle,
+    video_path: String,
+    timestamps: Vec<f64>,
+) -> Result<Vec<FrameAnalysis>, String> {
+    // Extract specific frames and analyze them
+    // This allows on-demand analysis of specific moments
+    Ok(vec![])
+}
@@ -1,3 +1,4 @@
+mod ai_vision;
 mod audio;
 mod audio_meter;
 mod auth;
@@ -1808,7 +1809,9 @@ pub async fn run(recording_logging_handle: LoggingHandle) {
             captions::download_whisper_model,
             captions::check_model_exists,
             captions::delete_whisper_model,
-            captions::export_captions_srt
+            captions::export_captions_srt,
+            ai_vision::analyze_video_content,
+            ai_vision::analyze_frame_batch
         ])
         .events(tauri_specta::collect_events![
             RecordingOptionsChanged,