From d9b17431a799a0354103ef390f6db15f34fb92be Mon Sep 17 00:00:00 2001 From: blackhao <13851610112@163.com> Date: Wed, 10 Dec 2025 19:30:26 -0600 Subject: init file sys --- backend/app/main.py | 553 +++++++++++++++- backend/app/schemas.py | 4 + backend/app/services/llm.py | 159 +++-- .../files/1d344051-3b8e-41bc-8c57-15710383bd42 | Bin 0 -> 249853 bytes .../files/23f72737-1d5f-4cab-b68e-8c9010a8fdc4 | Bin 0 -> 263914 bytes .../files/2d43e6a5-2823-4564-8beb-fef2721056f3 | Bin 0 -> 4396443 bytes .../files/58252c05-718e-4eea-a23d-56a64d03b4fc | Bin 263914 -> 0 bytes .../files/609ece1b-0894-4904-b363-d6e1c7a90be8 | Bin 0 -> 30337 bytes .../files/dcf69ca5-2efa-4b8a-acec-3864a803f073 | Bin 0 -> 275191 bytes backend/data/test/files/index.json | 70 +- backend/data/test/files/vector_store.json | 1 + backend/data/test/projects/untitled (2).json | 730 +++++++++++++++++++++ frontend/src/App.tsx | 4 +- frontend/src/components/LeftSidebar.tsx | 58 +- frontend/src/components/Sidebar.tsx | 689 +++++++++++++++++-- frontend/src/store/flowStore.ts | 109 ++- 16 files changed, 2263 insertions(+), 114 deletions(-) create mode 100644 backend/data/test/files/1d344051-3b8e-41bc-8c57-15710383bd42 create mode 100644 backend/data/test/files/23f72737-1d5f-4cab-b68e-8c9010a8fdc4 create mode 100644 backend/data/test/files/2d43e6a5-2823-4564-8beb-fef2721056f3 delete mode 100644 backend/data/test/files/58252c05-718e-4eea-a23d-56a64d03b4fc create mode 100644 backend/data/test/files/609ece1b-0894-4904-b363-d6e1c7a90be8 create mode 100644 backend/data/test/files/dcf69ca5-2efa-4b8a-acec-3864a803f073 create mode 100644 backend/data/test/files/vector_store.json create mode 100644 backend/data/test/projects/untitled (2).json diff --git a/backend/app/main.py b/backend/app/main.py index 886bd9e..be30333 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,16 +1,20 @@ +import asyncio +import tempfile +import time from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, FileResponse -from fastapi import UploadFile, File +from fastapi import UploadFile, File, Form from pydantic import BaseModel from app.schemas import NodeRunRequest, NodeRunResponse, MergeStrategy, Role, Message, Context, LLMConfig, ModelProvider, ReasoningEffort -from app.services.llm import llm_streamer, generate_title +from app.services.llm import llm_streamer, generate_title, get_openai_client from dotenv import load_dotenv import os import json import shutil from typing import List, Literal, Optional from uuid import uuid4 +from google import genai load_dotenv() @@ -28,6 +32,9 @@ app.add_middleware( DATA_ROOT = os.path.abspath(os.getenv("DATA_ROOT", os.path.join(os.getcwd(), "data"))) DEFAULT_USER = "test" ARCHIVE_FILENAME = "archived_nodes.json" +VALID_FILE_PROVIDERS = {"local", "openai", "google"} +OPENAI_MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB limit per OpenAI docs +OPENAI_DEFAULT_FILE_PURPOSE = os.getenv("OPENAI_FILE_PURPOSE", "user_data") def ensure_user_root(user: str) -> str: """ @@ -152,6 +159,10 @@ class FileMeta(BaseModel): created_at: float provider: Optional[str] = None provider_file_id: Optional[str] = None + openai_file_id: Optional[str] = None + openai_vector_store_id: Optional[str] = None + # Scopes for filtering: "project_path/node_id" composite keys + scopes: List[str] = [] class FolderRequest(BaseModel): user: str = DEFAULT_USER @@ -225,8 +236,44 @@ async def run_node_stream(request: NodeRunRequest): execution_context = Context(messages=final_messages) + tools: List[dict] = [] + attachments: List[dict] = [] + + if request.config.provider == ModelProvider.OPENAI: + vs_ids, debug_refs, filters = await prepare_openai_vector_search( + user=DEFAULT_USER, + attached_ids=request.attached_file_ids, + scopes=request.scopes, + llm_config=request.config, + ) + # Always enable file_search if vector store exists (even without explicit attachments) + # This allows nodes to access files attached in previous nodes of the trace + if not vs_ids: + # Try to get user's vector store anyway + try: + client = get_openai_client(request.config.api_key) + vs_id = await ensure_user_vector_store(DEFAULT_USER, client) + if vs_id: + vs_ids = [vs_id] + except Exception as e: + print(f"[warn] Could not get vector store: {e}") + + if vs_ids: + tool_def = {"type": "file_search", "vector_store_ids": vs_ids} + if filters: + tool_def["filters"] = filters + tools.append(tool_def) + print(f"[openai file_search] vs_ids={vs_ids} refs={debug_refs} filters={filters}") + elif request.config.provider == ModelProvider.GOOGLE: + attachments = await prepare_attachments( + user=DEFAULT_USER, + target_provider=request.config.provider, + attached_ids=request.attached_file_ids, + llm_config=request.config, + ) + return StreamingResponse( - llm_streamer(execution_context, request.user_prompt, request.config), + llm_streamer(execution_context, request.user_prompt, request.config, attachments, tools), media_type="text/event-stream" ) @@ -396,6 +443,107 @@ def archived_path(user: str) -> str: def files_index_path(user: str) -> str: return os.path.join(files_root(user), "index.json") +def user_vector_store_path(user: str) -> str: + return os.path.join(files_root(user), "vector_store.json") + +async def ensure_user_vector_store(user: str, client=None) -> str: + """ + Ensure there is a vector store for the user (OpenAI). + Persist the id under data//files/vector_store.json. + """ + path = user_vector_store_path(user) + if client is None: + client = get_openai_client() + + # Try existing cached ID + if os.path.exists(path): + try: + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + vs_id_cached = data.get("id") + if vs_id_cached: + try: + await client.vector_stores.retrieve(vector_store_id=vs_id_cached) + return vs_id_cached + except Exception: + # Possibly deleted; recreate below + pass + except Exception: + pass + + # create new + vs = await client.vector_stores.create(name=f"{user}-vs") + vs_id = getattr(vs, "id", None) + if not vs_id: + raise HTTPException(status_code=500, detail="Failed to create vector store") + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump({"id": vs_id}, f) + return vs_id + +async def ensure_openai_file_and_index(user: str, meta: FileMeta, path: str, llm_config: Optional[LLMConfig] = None) -> tuple[str, str]: + """ + Ensure the file is uploaded to OpenAI Files and added to the user's vector store. + Returns (openai_file_id, vector_store_id). + """ + client = get_openai_client(llm_config.api_key if llm_config else None) + vs_id = await ensure_user_vector_store(user, client) + + file_id = meta.openai_file_id or (meta.provider_file_id if meta.provider == "openai" else None) + if not file_id: + with open(path, "rb") as f: + content = f.read() + resp = await client.files.create( + file=(meta.name or "upload.bin", content), + purpose="assistants", + ) + file_id = getattr(resp, "id", None) + if not file_id: + raise HTTPException(status_code=500, detail="OpenAI file upload returned no file_id") + + await add_file_to_vector_store(vs_id, file_id, client=client) + return file_id, vs_id + +async def remove_file_from_vector_store(vs_id: str, file_id: str, client=None): + if not vs_id or not file_id: + return + if client is None: + client = get_openai_client() + try: + await client.vector_stores.files.delete(vector_store_id=vs_id, file_id=file_id) + except Exception as e: + print(f"[warn] remove_file_from_vector_store failed: {e}") + +async def add_file_to_vector_store(vs_id: str, file_id: str, client=None): + """ + Add a file to vector store with file_id as attribute for filtering. + We use file_id as the attribute so we can filter by specific files at query time. + """ + if client is None: + client = get_openai_client() + + # Use file_id as attribute for filtering + create_params = { + "vector_store_id": vs_id, + "file_id": file_id, + "attributes": {"file_id": file_id} # Enable filtering by file_id + } + + await client.vector_stores.files.create(**create_params) + # Poll until completed (limit capped at 100 per API spec) + for _ in range(20): + listing = await client.vector_stores.files.list(vector_store_id=vs_id, limit=100) + found = None + for item in getattr(listing, "data", []): + if getattr(item, "id", None) == file_id or getattr(item, "file_id", None) == file_id: + found = item + break + status = getattr(found, "status", None) if found else None + if status == "completed": + return + await asyncio.sleep(0.5) + # If not confirmed, still continue + return def load_files_index(user: str) -> List[FileMeta]: path = files_index_path(user) @@ -412,6 +560,226 @@ def save_files_index(user: str, items: List[FileMeta]): with open(path, "w", encoding="utf-8") as f: json.dump([item.model_dump() for item in items], f, ensure_ascii=False, indent=2) + +async def prepare_attachments( + user: str, + target_provider: str, + attached_ids: List[str], + llm_config: LLMConfig, +) -> list[dict]: + """ + For each attached file ID: + - If already uploaded to the target provider, reuse provider_file_id/uri. + - Otherwise, upload with the original filename (required by OpenAI). + Returns a list of dicts describing attachment references for the provider. + """ + if not attached_ids: + return [] + + items = load_files_index(user) + items_map = {item.id: item for item in items} + attachments: list[dict] = [] + + if isinstance(target_provider, ModelProvider): + provider_norm = target_provider.value.lower() + else: + provider_norm = str(target_provider).lower() + + for fid in attached_ids: + meta = items_map.get(fid) + if not meta: + print(f"[warn] Attached file id not found, skipping: {fid}") + continue + + path = os.path.join(files_root(user), fid) + if not os.path.exists(path): + raise HTTPException(status_code=404, detail=f"Attached file missing on disk: {meta.name}") + + if provider_norm == ModelProvider.OPENAI or provider_norm == "openai": + # Reuse provider file id if available + if meta.provider == "openai" and meta.provider_file_id: + attachments.append({ + "provider": "openai", + "file_id": meta.provider_file_id, + "name": meta.name, + "mime": meta.mime, + }) + continue + + # Upload to OpenAI with original filename + with open(path, "rb") as f: + content = f.read() + size = len(content) + if size > OPENAI_MAX_FILE_SIZE: + raise HTTPException(status_code=400, detail=f"File {meta.name} exceeds OpenAI 50MB limit") + + try: + client = get_openai_client(llm_config.api_key) + resp = await client.files.create( + file=(meta.name or "upload.bin", content), + purpose=OPENAI_DEFAULT_FILE_PURPOSE, + ) + openai_file_id = getattr(resp, "id", None) + if not openai_file_id: + raise HTTPException(status_code=500, detail="OpenAI file upload returned no file_id") + attachments.append({ + "provider": "openai", + "file_id": openai_file_id, + "name": meta.name, + "mime": meta.mime, + }) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"OpenAI upload failed: {str(e)}") + + elif provider_norm == ModelProvider.GOOGLE or provider_norm == "google": + # Reuse uri/name if available and looks like a URI + if meta.provider == "google" and meta.provider_file_id and "://" in meta.provider_file_id: + attachments.append({ + "provider": "google", + "uri": meta.provider_file_id, + "name": meta.name, + "mime": meta.mime, + }) + continue + + key = llm_config.api_key or os.getenv("GOOGLE_API_KEY") + if not key: + raise HTTPException(status_code=500, detail="Google API Key not found") + client = genai.Client(api_key=key) + + tmp_path = None + try: + with open(path, "rb") as f: + content = f.read() + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp.write(content) + tmp_path = tmp.name + + google_resp = await asyncio.to_thread( + client.files.upload, + file=tmp_path, + config={"mimeType": meta.mime or "application/octet-stream"}, + ) + google_name = getattr(google_resp, "name", None) + google_uri = getattr(google_resp, "uri", None) + + # Poll for ACTIVE and uri if missing + if google_name: + for _ in range(10): + try: + info = await asyncio.to_thread(client.files.get, name=google_name) + state = getattr(info, "state", None) + google_uri = getattr(info, "uri", google_uri) + if str(state).upper().endswith("ACTIVE") or state == "ACTIVE": + break + await asyncio.sleep(1) + except Exception: + await asyncio.sleep(1) + print(f"[google upload] name={google_name} uri={google_uri}") + + uri = google_uri or google_name + if not uri: + raise HTTPException(status_code=500, detail="Google upload returned no uri/name") + attachments.append({ + "provider": "google", + "uri": uri, + "name": meta.name, + "mime": meta.mime, + }) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Google upload failed: {str(e)}") + finally: + if tmp_path and os.path.exists(tmp_path): + os.remove(tmp_path) + + else: + raise HTTPException(status_code=400, detail=f"Unsupported provider for attachments: {target_provider}") + + # Debug log + print(f"[attachments] provider={provider_norm} count={len(attachments)} detail={[{'name': a.get('name'), 'id': a.get('file_id', a.get('uri'))} for a in attachments]}") + return attachments + + +async def prepare_openai_vector_search( + user: str, + attached_ids: List[str], + scopes: List[str], + llm_config: LLMConfig, +) -> tuple[List[str], List[dict], Optional[dict]]: + """ + Ensure all attached files are uploaded to OpenAI Files (purpose=assistants) and added to the user's vector store. + Returns (vector_store_ids, openai_file_refs_for_debug, filters). + + Filtering logic: + - If scopes provided: find files whose scopes intersect with requested scopes + - If only attached_ids: use those specific files + - Filters are constructed using file_id attribute in vector store + """ + items = load_files_index(user) + items_map = {item.id: item for item in items} + + # Determine which files to include based on scopes or attached_ids + relevant_files: List[FileMeta] = [] + + if scopes: + # Find files whose scopes intersect with requested scopes + for item in items: + if item.scopes and any(s in scopes for s in item.scopes): + relevant_files.append(item) + print(f"[file_search] scopes={scopes} matched_files={[f.name for f in relevant_files]}") + elif attached_ids: + # Fallback: use explicitly attached files + for fid in attached_ids: + meta = items_map.get(fid) + if meta: + relevant_files.append(meta) + + if not relevant_files: + return [], [], None + + changed = False + vs_ids: List[str] = [] + debug_refs: List[dict] = [] + file_ids_for_filter: List[str] = [] + + for meta in relevant_files: + path = os.path.join(files_root(user), meta.id) + if not os.path.exists(path): + print(f"[warn] Attached file missing on disk, skipping: {meta.id}") + continue + # Enforce 50MB OpenAI limit + file_size = os.path.getsize(path) + if file_size > OPENAI_MAX_FILE_SIZE: + print(f"[warn] File {meta.name} exceeds OpenAI 50MB limit, skipping") + continue + + openai_file_id, vs_id = await ensure_openai_file_and_index(user, meta, path, llm_config) + if meta.openai_file_id != openai_file_id or meta.openai_vector_store_id != vs_id: + meta.openai_file_id = openai_file_id + meta.openai_vector_store_id = vs_id + changed = True + vs_ids.append(vs_id) + debug_refs.append({"name": meta.name, "file_id": openai_file_id, "vs_id": vs_id}) + if openai_file_id: + file_ids_for_filter.append(openai_file_id) + + if changed: + save_files_index(user, list(items_map.values())) + + # deduplicate + vs_ids_unique = list({vid for vid in vs_ids if vid}) + + # Build filters to only search relevant files + filters = None + if file_ids_for_filter: + filters = {"type": "in", "key": "file_id", "value": file_ids_for_filter} + + return vs_ids_unique, debug_refs, filters + # ------------------------------------------------- @app.get("/api/projects/archived") @@ -449,27 +817,124 @@ def list_files(user: str = DEFAULT_USER): @app.post("/api/files/upload") -async def upload_file(user: str = DEFAULT_USER, file: UploadFile = File(...)): +async def upload_file( + user: str = DEFAULT_USER, + file: UploadFile = File(...), + provider: str = Form("local"), + purpose: Optional[str] = Form(None), +): migrate_legacy_layout(user) items = load_files_index(user) file_id = str(uuid4()) dest_root = files_root(user) dest_path = os.path.join(dest_root, file_id) + file_name = file.filename or "upload.bin" + provider_normalized = (provider or "local").lower() + if provider_normalized not in VALID_FILE_PROVIDERS: + raise HTTPException(status_code=400, detail="Unsupported provider") + try: content = await file.read() - size = len(content) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + size = len(content) + if provider_normalized == "openai" and size > OPENAI_MAX_FILE_SIZE: + raise HTTPException(status_code=400, detail="OpenAI provider limit: max 50MB per file") + + provider_file_id: Optional[str] = None + provider_created_at: Optional[float] = None + + if provider_normalized == "openai": + try: + client = get_openai_client() + upload_purpose = purpose or OPENAI_DEFAULT_FILE_PURPOSE + resp = await client.files.create( + file=(file_name, content), + purpose=upload_purpose, + ) + provider_file_id = getattr(resp, "id", None) + provider_created_at = getattr(resp, "created_at", None) + except Exception as e: + raise HTTPException(status_code=500, detail=f"OpenAI upload failed: {str(e)}") + elif provider_normalized == "google": + try: + key = os.getenv("GOOGLE_API_KEY") + if not key: + raise HTTPException(status_code=500, detail="Google API Key not found") + client = genai.Client(api_key=key) + # The Google GenAI SDK upload is synchronous; run in thread to avoid blocking the event loop. + tmp_path = None + try: + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp.write(content) + tmp_path = tmp.name + google_resp = await asyncio.to_thread( + client.files.upload, + file=tmp_path, + config={"mimeType": file.content_type or "application/octet-stream"}, + ) + google_name = getattr(google_resp, "name", None) + google_uri = getattr(google_resp, "uri", None) + + # Poll for ACTIVE and uri if missing + if google_name: + for _ in range(10): + try: + info = await asyncio.to_thread(client.files.get, name=google_name) + state = getattr(info, "state", None) + google_uri = getattr(info, "uri", google_uri) + if str(state).upper().endswith("ACTIVE") or state == "ACTIVE": + break + await asyncio.sleep(1) + except Exception: + await asyncio.sleep(1) + + provider_file_id = google_uri or google_name + finally: + if tmp_path and os.path.exists(tmp_path): + os.remove(tmp_path) + + provider_created_at = time.time() + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Google upload failed: {str(e)}") + + try: + os.makedirs(dest_root, exist_ok=True) with open(dest_path, "wb") as f: f.write(content) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + created_at = provider_created_at or os.path.getmtime(dest_path) + meta = FileMeta( id=file_id, - name=file.filename, + name=file_name, size=size, mime=file.content_type or "application/octet-stream", - created_at=os.path.getmtime(dest_path), + created_at=created_at, + provider=provider_normalized if provider_normalized != "local" else None, + provider_file_id=provider_file_id, + openai_file_id=None, + openai_vector_store_id=None, ) + + # Always try to index into OpenAI vector store (if <=50MB) + if size <= OPENAI_MAX_FILE_SIZE: + try: + openai_file_id, vs_id = await ensure_openai_file_and_index(user, meta, dest_path, None) + meta.openai_file_id = openai_file_id + meta.openai_vector_store_id = vs_id + if provider_normalized == "openai" and not meta.provider_file_id: + meta.provider_file_id = openai_file_id + except Exception as e: + print(f"[warn] OpenAI indexing failed for {file_name}: {e}") + else: + print(f"[warn] Skipping OpenAI indexing for {file_name}: exceeds 50MB") + items.append(meta) save_files_index(user, items) return {"file": meta} @@ -489,15 +954,87 @@ def download_file(user: str = DEFAULT_USER, file_id: str = ""): @app.post("/api/files/delete") -def delete_file(user: str = DEFAULT_USER, file_id: str = ""): +async def delete_file(user: str = DEFAULT_USER, file_id: str = ""): migrate_legacy_layout(user) items = load_files_index(user) meta = next((i for i in items if i.id == file_id), None) if not meta: raise HTTPException(status_code=404, detail="file not found") + + # Remove from vector store and OpenAI Files if present + if meta.openai_vector_store_id and meta.openai_file_id: + await remove_file_from_vector_store(meta.openai_vector_store_id, meta.openai_file_id) + if meta.provider == "openai" and meta.provider_file_id: + try: + client = get_openai_client() + await client.files.delete(meta.provider_file_id) + except Exception as e: + raise HTTPException(status_code=500, detail=f"OpenAI delete failed: {str(e)}") + if meta.provider == "google" and meta.provider_file_id: + try: + key = os.getenv("GOOGLE_API_KEY") + if not key: + raise HTTPException(status_code=500, detail="Google API Key not found") + client = genai.Client(api_key=key) + await asyncio.to_thread(client.files.delete, meta.provider_file_id) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Google delete failed: {str(e)}") + path = os.path.join(files_root(user), file_id) if os.path.exists(path): os.remove(path) items = [i for i in items if i.id != file_id] save_files_index(user, items) return {"ok": True} + + +class AddScopeRequest(BaseModel): + user: str = DEFAULT_USER + file_id: str + scope: str # "project_path/node_id" composite key + + +@app.post("/api/files/add_scope") +def add_file_scope(request: AddScopeRequest): + """ + Add a scope to a file's scopes list. + Called when user attaches a file to a node. + """ + migrate_legacy_layout(request.user) + items = load_files_index(request.user) + meta = next((i for i in items if i.id == request.file_id), None) + if not meta: + raise HTTPException(status_code=404, detail="file not found") + + if request.scope not in meta.scopes: + meta.scopes.append(request.scope) + save_files_index(request.user, items) + + return {"file": meta.model_dump()} + + +class RemoveScopeRequest(BaseModel): + user: str = DEFAULT_USER + file_id: str + scope: str + + +@app.post("/api/files/remove_scope") +def remove_file_scope(request: RemoveScopeRequest): + """ + Remove a scope from a file's scopes list. + Called when user detaches a file from a node. + """ + migrate_legacy_layout(request.user) + items = load_files_index(request.user) + meta = next((i for i in items if i.id == request.file_id), None) + if not meta: + raise HTTPException(status_code=404, detail="file not found") + + if request.scope in meta.scopes: + meta.scopes.remove(request.scope) + save_files_index(request.user, items) + + return {"file": meta.model_dump()} diff --git a/backend/app/schemas.py b/backend/app/schemas.py index bd8ebe7..54c0560 100644 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -49,6 +49,10 @@ class NodeRunRequest(BaseModel): user_prompt: str config: LLMConfig merge_strategy: MergeStrategy = MergeStrategy.SMART + attached_file_ids: List[str] = Field(default_factory=list) + # Scopes for file_search filtering: ["project_path/node_id", ...] + # Contains all project/node combinations in the current trace + scopes: List[str] = Field(default_factory=list) class NodeRunResponse(BaseModel): node_id: str diff --git a/backend/app/services/llm.py b/backend/app/services/llm.py index b372f9e..96b0514 100644 --- a/backend/app/services/llm.py +++ b/backend/app/services/llm.py @@ -1,5 +1,5 @@ import os -from typing import AsyncGenerator +from typing import AsyncGenerator, List, Dict, Any, Optional import openai import google.generativeai as genai from app.schemas import LLMConfig, Message, Role, Context @@ -23,8 +23,15 @@ def configure_google(api_key: str = None): raise ValueError("Google API Key not found") genai.configure(api_key=key) -async def stream_openai(messages: list[Message], config: LLMConfig) -> AsyncGenerator[str, None]: +async def stream_openai( + messages: list[Message], + config: LLMConfig, + attachments: Optional[List[Dict[str, Any]]] = None, + tools: Optional[List[Dict[str, Any]]] = None, +) -> AsyncGenerator[str, None]: client = get_openai_client(config.api_key) + attachments = attachments or [] + tools = tools or [] # Convert internal Message schema to OpenAI format openai_messages = [] @@ -38,17 +45,23 @@ async def stream_openai(messages: list[Message], config: LLMConfig) -> AsyncGene responses_only_models = ['gpt-5-pro'] # Models that CAN use Responses API (and thus support web_search tool) + model_lower = config.model_name.lower() responses_capable_models = [ - 'gpt-5', 'gpt-5-chat-latest', 'gpt-5-mini', 'gpt-5-nano', - 'gpt-5-pro', 'gpt-5.1', 'gpt-5.1-chat-latest', 'o3' + 'gpt-5', 'gpt-5-chat-latest', 'gpt-5-mini', 'gpt-5-nano', + 'gpt-5-pro', 'gpt-5.1', 'gpt-5.1-chat-latest', 'o3', + 'o1', 'o1-preview', 'o1-mini', + 'gpt-4o', 'gpt-4o-mini', 'gpt-4o-realtime', 'gpt-4o-mini-tts' ] # Use Responses API if: # 1. Model ONLY supports Responses API, OR # 2. User wants web search AND model is capable of Responses API + # 3. Attachments are present (Responses supports input_file) use_responses_api = ( config.model_name in responses_only_models or - (config.enable_google_search and config.model_name in responses_capable_models) + (config.enable_google_search and (config.model_name in responses_capable_models or model_lower.startswith("gpt-4o"))) or + (attachments and (config.model_name in responses_capable_models or model_lower.startswith("gpt-4o"))) or + (tools) ) if use_responses_api: @@ -56,25 +69,50 @@ async def stream_openai(messages: list[Message], config: LLMConfig) -> AsyncGene # yield f"[Debug: Config Search={config.enable_google_search}, Model={config.model_name}]\n" # Use new client.responses.create API with Polling Strategy - # Convert messages to Responses API format (same as Chat Completions) - # Responses API accepts input as array of message objects - - # Filter out system messages (use instructions instead) and format for Responses API + # Build Responses API input input_messages = [] for msg in openai_messages: - if msg['role'] != 'system': # System prompt goes to instructions - input_messages.append({ - "role": msg['role'], - "content": msg['content'] + if msg['role'] == 'system': + continue # goes to instructions + # User messages use input_text, assistant messages use output_text + content_type = "input_text" if msg['role'] == 'user' else "output_text" + input_messages.append({ + "role": msg['role'], + "content": [ + { + "type": content_type, + "text": msg['content'] + } + ] + }) + + # Append attachments as separate user message (files only) + file_parts = [] + for att in attachments: + if att.get("provider") == "openai" and att.get("file_id"): + file_parts.append({ + "type": "input_file", + "file_id": att["file_id"] }) + if file_parts: + input_messages.append({ + "role": "user", + "content": file_parts + }) resp_params = { "model": config.model_name, "input": input_messages, # Full conversation history - "stream": False, # Disable stream to get immediate ID - "background": True, # Enable background mode for async execution - "store": True + "stream": False, # Get full output in one call + "background": False, + "store": True, + "tool_choice": "auto", } + if tools: + resp_params["tools"] = tools + resp_params["tool_choice"] = "auto" + # Optional: include results for debugging / citations + resp_params["include"] = ["file_search_call.results"] # Add reasoning effort (not supported by chat-latest models) models_without_effort = ['gpt-5-chat-latest', 'gpt-5.1-chat-latest'] @@ -82,28 +120,40 @@ async def stream_openai(messages: list[Message], config: LLMConfig) -> AsyncGene resp_params["reasoning"] = {"effort": config.reasoning_effort.value} # Enable Web Search if requested (Reusing enable_google_search flag as generic web_search flag) + # IMPORTANT: Append to existing tools instead of overwriting if config.enable_google_search: - resp_params["tools"] = [{"type": "web_search"}] + if resp_params.get("tools"): + resp_params["tools"].append({"type": "web_search"}) + else: + resp_params["tools"] = [{"type": "web_search"}] resp_params["tool_choice"] = "auto" - # Debugging tool injection - # yield "[Debug: Web Search Tool Injected]" # Uncomment to debug if config.system_prompt: resp_params["instructions"] = config.system_prompt + + # Debug: print final tools being sent + print(f"[responses debug] final tools: {resp_params.get('tools')}") - # 1. Create Response (Async/Background) - # This returns a Response object immediately with status 'queued' or 'in_progress' + # 1. Create Response (non-background) initial_resp = await client.responses.create(**resp_params) response_id = initial_resp.id - + # 2. Poll for Completion import asyncio - # Poll for up to 10 minutes - for _ in range(300): + for _ in range(300): final_resp = await client.responses.retrieve(response_id) - + if final_resp.status == 'completed': - # Parse final response object + # Debug: log outputs and tool calls + try: + outs = getattr(final_resp, "output", []) + print(f"[responses debug] output items: {[getattr(o, 'type', None) for o in outs]}") + for o in outs: + if getattr(o, "type", None) == "file_search_call": + print(f"[responses debug] file_search_call: {o}") + except Exception as e: + print(f"[responses debug] failed to inspect output: {e}") + found_content = False if hasattr(final_resp, 'output'): for out in final_resp.output: @@ -128,13 +178,16 @@ async def stream_openai(messages: list[Message], config: LLMConfig) -> AsyncGene yield f"\n[Error: Response generation {final_resp.status}: {error_msg}]" return - # Still in_progress await asyncio.sleep(2) yield "\n[Error: Polling timed out]" return - # Standard Chat Completions API + # Standard Chat Completions API (attachments not supported here) + if attachments: + yield "[Error] Attachments are only supported for Responses API-capable models." + return + # Prepare parameters req_params = { "model": config.model_name, @@ -175,7 +228,8 @@ async def stream_openai(messages: list[Message], config: LLMConfig) -> AsyncGene elif getattr(delta, 'refusal', None): yield f"[Refusal: {delta.refusal}]" -async def stream_google(messages: list[Message], config: LLMConfig) -> AsyncGenerator[str, None]: +async def stream_google(messages: list[Message], config: LLMConfig, attachments: List[Dict[str, Any]] | None = None) -> AsyncGenerator[str, None]: + attachments = attachments or [] # Use new Google GenAI SDK (google-genai) from google import genai from google.genai import types @@ -200,6 +254,34 @@ async def stream_google(messages: list[Message], config: LLMConfig) -> AsyncGene tools=tools ) + # If attachments present, send as a single generate_content call (non-streaming) + if attachments: + parts = [] + for att in attachments: + uri = att.get("uri") + mime = att.get("mime") or "application/octet-stream" + if uri: + try: + parts.append(types.Part.from_uri(uri, mime_type=mime)) + except Exception: + parts.append(types.Part(text=f"[file attached: {uri}]")) + for msg in messages: + parts.append(types.Part(text=msg.content)) + print(f"[gemini] sending attachments: {[att.get('uri') for att in attachments]}") + try: + response = await client.aio.models.generate_content( + model=config.model_name, + contents=[types.Content(role="user", parts=parts)], + config=gen_config + ) + if response and getattr(response, "text", None): + yield response.text + else: + yield "[Error] Gemini response returned no text." + except Exception as e: + yield f"[Error] Gemini call failed: {str(e)}" + return + # Prepare History # Extract last message as the prompt prompt_msg = "..." @@ -223,13 +305,6 @@ async def stream_google(messages: list[Message], config: LLMConfig) -> AsyncGene config=gen_config ) - # Streaming call - # In google-genai SDK, streaming is usually via send_message_stream - - # Check if send_message_stream exists, otherwise use send_message with stream=True (but error says no) - # Let's assume send_message_stream is the way. - - # Note: chat_session.send_message_stream returns an AsyncIterator (or a coroutine returning one) response_stream = await chat_session.send_message_stream(prompt_msg) async for chunk in response_stream: @@ -237,7 +312,13 @@ async def stream_google(messages: list[Message], config: LLMConfig) -> AsyncGene if chunk.text: yield chunk.text -async def llm_streamer(context: Context, user_prompt: str, config: LLMConfig) -> AsyncGenerator[str, None]: +async def llm_streamer( + context: Context, + user_prompt: str, + config: LLMConfig, + attachments: List[Dict[str, Any]] | None = None, + tools: List[Dict[str, Any]] | None = None, +) -> AsyncGenerator[str, None]: # 1. Merge Context + New User Prompt # We create a temporary list of messages for this inference messages_to_send = context.messages.copy() @@ -253,10 +334,10 @@ async def llm_streamer(context: Context, user_prompt: str, config: LLMConfig) -> # 2. Call Provider try: if config.provider == "openai": - async for chunk in stream_openai(messages_to_send, config): + async for chunk in stream_openai(messages_to_send, config, attachments, tools): yield chunk elif config.provider == "google": - async for chunk in stream_google(messages_to_send, config): + async for chunk in stream_google(messages_to_send, config, attachments): yield chunk else: yield f"Error: Unsupported provider {config.provider}" diff --git a/backend/data/test/files/1d344051-3b8e-41bc-8c57-15710383bd42 b/backend/data/test/files/1d344051-3b8e-41bc-8c57-15710383bd42 new file mode 100644 index 0000000..747674f Binary files /dev/null and b/backend/data/test/files/1d344051-3b8e-41bc-8c57-15710383bd42 differ diff --git a/backend/data/test/files/23f72737-1d5f-4cab-b68e-8c9010a8fdc4 b/backend/data/test/files/23f72737-1d5f-4cab-b68e-8c9010a8fdc4 new file mode 100644 index 0000000..a837816 Binary files /dev/null and b/backend/data/test/files/23f72737-1d5f-4cab-b68e-8c9010a8fdc4 differ diff --git a/backend/data/test/files/2d43e6a5-2823-4564-8beb-fef2721056f3 b/backend/data/test/files/2d43e6a5-2823-4564-8beb-fef2721056f3 new file mode 100644 index 0000000..c522464 Binary files /dev/null and b/backend/data/test/files/2d43e6a5-2823-4564-8beb-fef2721056f3 differ diff --git a/backend/data/test/files/58252c05-718e-4eea-a23d-56a64d03b4fc b/backend/data/test/files/58252c05-718e-4eea-a23d-56a64d03b4fc deleted file mode 100644 index a837816..0000000 Binary files a/backend/data/test/files/58252c05-718e-4eea-a23d-56a64d03b4fc and /dev/null differ diff --git a/backend/data/test/files/609ece1b-0894-4904-b363-d6e1c7a90be8 b/backend/data/test/files/609ece1b-0894-4904-b363-d6e1c7a90be8 new file mode 100644 index 0000000..f2d6870 Binary files /dev/null and b/backend/data/test/files/609ece1b-0894-4904-b363-d6e1c7a90be8 differ diff --git a/backend/data/test/files/dcf69ca5-2efa-4b8a-acec-3864a803f073 b/backend/data/test/files/dcf69ca5-2efa-4b8a-acec-3864a803f073 new file mode 100644 index 0000000..0b9b64e Binary files /dev/null and b/backend/data/test/files/dcf69ca5-2efa-4b8a-acec-3864a803f073 differ diff --git a/backend/data/test/files/index.json b/backend/data/test/files/index.json index d9e559f..537cbe1 100644 --- a/backend/data/test/files/index.json +++ b/backend/data/test/files/index.json @@ -1,11 +1,75 @@ [ { - "id": "58252c05-718e-4eea-a23d-56a64d03b4fc", + "id": "dcf69ca5-2efa-4b8a-acec-3864a803f073", + "name": "542proposal.pdf", + "size": 275191, + "mime": "application/pdf", + "created_at": 1765347496.7856553, + "provider": null, + "provider_file_id": null, + "openai_file_id": "file-UTunaRvEEufAK3vMPtC4oh", + "openai_vector_store_id": "vs_693910aa910081919231c2c6b6c69e1d", + "scopes": [ + "untitled/node_1765410411339", + "untitled.json/node_1765320756261", + "untitled (2).json/node_1765320756261" + ] + }, + { + "id": "2d43e6a5-2823-4564-8beb-fef2721056f3", + "name": "ICLR_New (2).pdf", + "size": 4396443, + "mime": "application/pdf", + "created_at": 1765411275.0, + "provider": "openai", + "provider_file_id": "file-KF3avxznCxcDXnZxEEUhTo", + "openai_file_id": "file-KF3avxznCxcDXnZxEEUhTo", + "openai_vector_store_id": "vs_693910aa910081919231c2c6b6c69e1d", + "scopes": [ + "untitled/node_1765410434473", + "untitled (2).json/node_1765411626468", + "untitled (2).json/node_1765412541501", + "untitled (2).json/node_1765413048314" + ] + }, + { + "id": "23f72737-1d5f-4cab-b68e-8c9010a8fdc4", "name": "survey_on_survey (3).pdf", "size": 263914, "mime": "application/pdf", - "created_at": 1765324835.7883003, + "created_at": 1765415182.5394356, + "provider": null, + "provider_file_id": null, + "openai_file_id": "file-M1aQQuGPqBYPg48Wf7JfNo", + "openai_vector_store_id": "vs_693910aa910081919231c2c6b6c69e1d", + "scopes": [] + }, + { + "id": "609ece1b-0894-4904-b363-d6e1c7a90be8", + "name": "CV.pdf", + "size": 30337, + "mime": "application/pdf", + "created_at": 1765415665.079434, + "provider": null, + "provider_file_id": null, + "openai_file_id": "file-HB9LZHueJAaj9tNfx5QxvR", + "openai_vector_store_id": "vs_693910aa910081919231c2c6b6c69e1d", + "scopes": [ + "untitled (2).json/quick_chat_temp" + ] + }, + { + "id": "1d344051-3b8e-41bc-8c57-15710383bd42", + "name": "hw4.pdf", + "size": 249853, + "mime": "application/pdf", + "created_at": 1765416241.6634274, "provider": null, - "provider_file_id": null + "provider_file_id": null, + "openai_file_id": "file-UiqdRSQiyhcp4bNbAcuy7j", + "openai_vector_store_id": "vs_693910aa910081919231c2c6b6c69e1d", + "scopes": [ + "untitled (2).json/quick_chat_temp" + ] } ] \ No newline at end of file diff --git a/backend/data/test/files/vector_store.json b/backend/data/test/files/vector_store.json new file mode 100644 index 0000000..9b9d8a8 --- /dev/null +++ b/backend/data/test/files/vector_store.json @@ -0,0 +1 @@ +{"id": "vs_693910aa910081919231c2c6b6c69e1d"} \ No newline at end of file diff --git a/backend/data/test/projects/untitled (2).json b/backend/data/test/projects/untitled (2).json new file mode 100644 index 0000000..2751b9c --- /dev/null +++ b/backend/data/test/projects/untitled (2).json @@ -0,0 +1,730 @@ +{ + "version": 1, + "nodes": [ + { + "id": "node_1765320756261", + "type": "llmNode", + "position": { + "x": 100, + "y": 100 + }, + "data": { + "label": "PDF File Count", + "model": "gpt-5.1", + "temperature": 0.7, + "systemPrompt": "", + "userPrompt": "testing search_files. Please search for pdf files in this conversation and count the number.", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "messages": [ + { + "id": "msg_1765413079192_u", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "msg_1765413079192_a", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + } + ], + "traces": [], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + } + ] + } + ], + "forkedTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-u", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-a", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + } + ] + } + ], + "mergedTraces": [], + "response": "I found **1 PDF file** in this conversation: `542proposal.pdf`. ", + "status": "success", + "inputs": 1, + "activeTraceIds": [], + "attachments": [ + { + "fileId": "8a29c29f-fc55-4197-ba61-ac6fa1b5d628", + "name": "507hw_.pdf", + "providerFileId": "file-9ptprToszH4K38Mg3Qu5B6", + "expiresAt": null + } + ], + "attachedFileIds": [ + "dcf69ca5-2efa-4b8a-acec-3864a803f073" + ], + "querySentAt": 1765413043481, + "responseReceivedAt": 1765413079192 + }, + "width": 200, + "height": 64, + "selected": false, + "dragging": false, + "positionAbsolute": { + "x": 109.22690128581917, + "y": 455.2354603148382 + } + }, + { + "id": "node_1765413048314", + "type": "llmNode", + "position": { + "x": 450, + "y": 100 + }, + "data": { + "label": "Pdf Files", + "model": "gpt-5.1", + "temperature": 0.7, + "systemPrompt": "", + "userPrompt": "what about now?", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "msg_1765415155872_u", + "role": "user", + "content": "what about now?" + }, + { + "id": "msg_1765415155872_a", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + } + ], + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "response": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` ", + "status": "success", + "inputs": 1, + "activeTraceIds": [ + "fork-node_1765320756261-1765413083937" + ], + "attachedFileIds": [ + "2d43e6a5-2823-4564-8beb-fef2721056f3" + ], + "querySentAt": 1765415098761, + "responseReceivedAt": 1765415155872 + }, + "width": 200, + "height": 64, + "selected": true, + "positionAbsolute": { + "x": 407.983813671746, + "y": 459.30622770179616 + }, + "dragging": false + }, + { + "id": "node_1765415177931", + "type": "llmNode", + "position": { + "x": 800, + "y": 100 + }, + "data": { + "label": "PDF Files", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "我来多做几轮测试。现在呢?", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [], + "attachedFileIds": [], + "response": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` ", + "status": "success", + "inputs": 1, + "querySentAt": 1765415177931, + "responseReceivedAt": 1765415177931 + }, + "width": 200, + "height": 64 + }, + { + "id": "node_1765415203978", + "type": "llmNode", + "position": { + "x": 1150, + "y": 100 + }, + "data": { + "label": "PDF Files", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "那么现在呢", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [], + "attachedFileIds": [], + "response": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` ", + "status": "success", + "inputs": 1, + "querySentAt": 1765415203978, + "responseReceivedAt": 1765415203978 + }, + "width": 200, + "height": 64 + }, + { + "id": "node_1765415227098", + "type": "llmNode", + "position": { + "x": 1500, + "y": 100 + }, + "data": { + "label": "Citation Markers", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "”fileciteturn0file0turn0file7“这一串字符是什么?", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [], + "attachedFileIds": [], + "response": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。", + "status": "success", + "inputs": 1, + "querySentAt": 1765415227098, + "responseReceivedAt": 1765415227098 + }, + "width": 200, + "height": 64 + }, + { + "id": "node_1765415275416", + "type": "llmNode", + "position": { + "x": 1850, + "y": 100 + }, + "data": { + "label": "OpenAI Citation Rendering", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + } + ] + } + ], + "outgoingTraces": [], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [], + "attachedFileIds": [], + "response": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。", + "status": "success", + "inputs": 1, + "querySentAt": 1765415275416, + "responseReceivedAt": 1765415275416 + }, + "width": 226, + "height": 64 + } + ], + "edges": [ + { + "source": "node_1765320756261", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765413048314", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765320756261trace-fork-node_1765320756261-1765413083937-node_1765413048314input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765413048314", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765415177931", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765413048314trace-fork-node_1765320756261-1765413083937-node_1765415177931input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765415177931", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765415203978", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765415177931trace-fork-node_1765320756261-1765413083937-node_1765415203978input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765415203978", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765415227098", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765415203978trace-fork-node_1765320756261-1765413083937-node_1765415227098input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765415227098", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765415275416", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765415227098trace-fork-node_1765320756261-1765413083937-node_1765415275416input-0", + "data": { + "isMerged": false, + "colors": [] + } + } + ], + "viewport": { + "x": 83.99242028051987, + "y": 146.6529258302924, + "zoom": 0.8705505632961249 + }, + "theme": "light" +} \ No newline at end of file diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 8ae93c7..5477ff2 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -158,7 +158,9 @@ function Flow() { mergedTraces: [], response: '', status: 'idle', - inputs: 1 + inputs: 1, + attachedFileIds: [], + activeTraceIds: [] }, }); setMenu(null); diff --git a/frontend/src/components/LeftSidebar.tsx b/frontend/src/components/LeftSidebar.tsx index a75df39..aff2df8 100644 --- a/frontend/src/components/LeftSidebar.tsx +++ b/frontend/src/components/LeftSidebar.tsx @@ -2,7 +2,7 @@ import React, { useEffect, useMemo, useRef, useState, useCallback } from 'react' import { useReactFlow } from 'reactflow'; import { Folder, FileText, Archive, ChevronLeft, ChevronRight, Trash2, MessageSquare, - MoreVertical, Download, Upload, Plus, RefreshCw, Edit3 + MoreVertical, Download, Upload, Plus, RefreshCw, Edit3, Loader2 } from 'lucide-react'; import useFlowStore, { type FSItem, type BlueprintDocument, type FileMeta } from '../store/flowStore'; @@ -19,6 +19,7 @@ const LeftSidebar: React.FC = ({ isOpen, onToggle }) => { createNodeFromArchive, theme, files, + uploadingFileIds, projectTree, currentBlueprintPath, saveStatus, @@ -47,6 +48,9 @@ const LeftSidebar: React.FC = ({ isOpen, onToggle }) => { const [dragItem, setDragItem] = useState(null); const [showSaveStatus, setShowSaveStatus] = useState(false); const [expanded, setExpanded] = useState>(() => new Set(['.'])); + const [fileProvider, setFileProvider] = useState<'local' | 'openai' | 'google'>('local'); + const [openaiPurpose, setOpenaiPurpose] = useState('user_data'); + const [fileSearch, setFileSearch] = useState(''); const handleDragStart = (e: React.DragEvent, archiveId: string) => { e.dataTransfer.setData('archiveId', archiveId); @@ -235,7 +239,10 @@ const LeftSidebar: React.FC = ({ isOpen, onToggle }) => { let failed: string[] = []; for (const f of Array.from(list)) { try { - await uploadFile(f); + await uploadFile(f, { + provider: fileProvider, + purpose: fileProvider === 'openai' ? openaiPurpose : undefined, + }); ok += 1; } catch (e) { console.error(e); @@ -251,6 +258,13 @@ const LeftSidebar: React.FC = ({ isOpen, onToggle }) => { } }; + const filteredFiles = useMemo(() => { + const q = fileSearch.trim().toLowerCase(); + if (!q) return files; + // Only search local files; keep provider files out of filtered results + return files.filter(f => !f.provider && f.name.toLowerCase().includes(q)); + }, [files, fileSearch]); + const handleFilesInputChange = async (e: React.ChangeEvent) => { const files = e.target.files; if (files && files.length > 0) { @@ -610,14 +624,37 @@ const LeftSidebar: React.FC = ({ isOpen, onToggle }) => { Drag files here or click upload - {files.length === 0 ? ( +
+ setFileSearch(e.target.value)} + className={`flex-1 text-sm border rounded px-2 py-1 ${isDark ? 'bg-gray-800 border-gray-700 text-gray-100 placeholder-gray-500' : 'bg-white border-gray-200 text-gray-800 placeholder-gray-400'}`} + placeholder="Search files by name..." + /> + {fileSearch && ( + + )} +
+ + {files.length === 0 && (uploadingFileIds?.length || 0) === 0 ? (

No files uploaded yet.

+ ) : filteredFiles.length === 0 && (uploadingFileIds?.length || 0) === 0 ? ( +
+ +

No files match your search.

+
) : (
- {files.map(f => ( + {filteredFiles.map(f => (
= ({ isOpen, onToggle }) => { {formatSize(f.size)} • {new Date(f.created_at * 1000).toLocaleString()} + {f.provider && ( + + Provider: {f.provider === 'openai' ? 'OpenAI' : f.provider === 'google' ? 'Gemini' : f.provider} + + )}
))} + {uploadingFileIds && uploadingFileIds.length > 0 && ( +
+
+ + Uploading {uploadingFileIds.length} file{uploadingFileIds.length > 1 ? 's' : ''}… +
+
+ )} )} diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx index 3008ba3..a8dd82e 100644 --- a/frontend/src/components/Sidebar.tsx +++ b/frontend/src/components/Sidebar.tsx @@ -1,9 +1,9 @@ -import React, { useState, useEffect, useRef } from 'react'; +import React, { useState, useEffect, useRef, useMemo } from 'react'; import { useReactFlow } from 'reactflow'; import useFlowStore from '../store/flowStore'; -import type { NodeData, Trace, Message, MergedTrace, MergeStrategy } from '../store/flowStore'; +import type { NodeData, Trace, Message, MergedTrace, MergeStrategy, FileMeta } from '../store/flowStore'; import ReactMarkdown from 'react-markdown'; -import { Play, Settings, Info, Save, ChevronLeft, ChevronRight, Maximize2, Edit3, X, Check, FileText, MessageCircle, Send, GripVertical, GitMerge, Trash2, AlertCircle, Loader2, Navigation } from 'lucide-react'; +import { Play, Settings, Info, Save, ChevronLeft, ChevronRight, Maximize2, Edit3, X, Check, FileText, MessageCircle, Send, GripVertical, GitMerge, Trash2, AlertCircle, Loader2, Navigation, Upload, Search, Link } from 'lucide-react'; interface SidebarProps { isOpen: boolean; @@ -15,14 +15,21 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { const { nodes, edges, selectedNodeId, updateNodeData, getActiveContext, addNode, setSelectedNode, isTraceComplete, createQuickChatNode, theme, - createMergedTrace, updateMergedTrace, deleteMergedTrace, computeMergedMessages + createMergedTrace, updateMergedTrace, deleteMergedTrace, computeMergedMessages, + files, uploadFile, refreshFiles, addFileScope, removeFileScope, currentBlueprintPath, + saveCurrentBlueprint } = useFlowStore(); - const { setCenter } = useReactFlow(); + const { setCenter, getViewport } = useReactFlow(); const isDark = theme === 'dark'; const [activeTab, setActiveTab] = useState<'interact' | 'settings' | 'debug'>('interact'); const [streamBuffer, setStreamBuffer] = useState(''); const [streamingNodeId, setStreamingNodeId] = useState(null); // Track which node is streaming + // Attachments state + const [showAttachModal, setShowAttachModal] = useState(false); + const [attachSearch, setAttachSearch] = useState(''); + const settingsUploadRef = useRef(null); + // Response Modal & Edit states const [isModalOpen, setIsModalOpen] = useState(false); const [isEditing, setIsEditing] = useState(false); @@ -45,8 +52,14 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { const [quickChatEffort, setQuickChatEffort] = useState<'low' | 'medium' | 'high'>('medium'); const [quickChatNeedsDuplicate, setQuickChatNeedsDuplicate] = useState(false); const [quickChatWebSearch, setQuickChatWebSearch] = useState(true); + const [quickChatAttachedFiles, setQuickChatAttachedFiles] = useState([]); // File IDs for current message + const [quickChatSentFiles, setQuickChatSentFiles] = useState<{msgId: string, fileIds: string[]}[]>([]); // Files sent with messages + const [showQuickChatAttachModal, setShowQuickChatAttachModal] = useState(false); + const [quickChatAttachSearch, setQuickChatAttachSearch] = useState(''); + const [quickChatUploading, setQuickChatUploading] = useState(false); // Upload loading state const quickChatEndRef = useRef(null); const quickChatInputRef = useRef(null); + const quickChatUploadRef = useRef(null); // Merge Trace states const [showMergeModal, setShowMergeModal] = useState(false); @@ -95,6 +108,86 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { } }, [quickChatMessages]); + // Attachment helpers + const handleAttach = async (fileId: string) => { + if (!selectedNode) return; + const current = selectedNode.data.attachedFileIds || []; + if (!current.includes(fileId)) { + updateNodeData(selectedNode.id, { + attachedFileIds: [...current, fileId] + }); + // Add scope to file for filtering + const projectPath = currentBlueprintPath || 'untitled'; + const scope = `${projectPath}/${selectedNode.id}`; + try { + await addFileScope(fileId, scope); + } catch (e) { + console.error('Failed to add file scope:', e); + } + // Auto-save blueprint to persist attached files + if (currentBlueprintPath) { + saveCurrentBlueprint(currentBlueprintPath, getViewport()).catch(console.error); + } + } + setShowAttachModal(false); + }; + + const handleDetach = async (fileId: string) => { + if (!selectedNode) return; + const current = selectedNode.data.attachedFileIds || []; + updateNodeData(selectedNode.id, { + attachedFileIds: current.filter(id => id !== fileId) + }); + // Remove scope from file + const projectPath = currentBlueprintPath || 'untitled'; + const scope = `${projectPath}/${selectedNode.id}`; + try { + await removeFileScope(fileId, scope); + } catch (e) { + console.error('Failed to remove file scope:', e); + } + // Auto-save blueprint to persist detached files + if (currentBlueprintPath) { + saveCurrentBlueprint(currentBlueprintPath, getViewport()).catch(console.error); + } + }; + + const handleUploadAndAttach = async (e: React.ChangeEvent) => { + if (!e.target.files || e.target.files.length === 0 || !selectedNode) return; + const file = e.target.files[0]; + try { + // Determine provider based on node model + const model = selectedNode.data.model; + let provider: 'local' | 'openai' | 'google' = 'local'; + if (model.startsWith('gpt') || model.startsWith('o1') || model.startsWith('o3')) { + provider = 'openai'; + } else if (model.startsWith('gemini')) { + provider = 'google'; + } + + const meta = await uploadFile(file, { provider }); + handleAttach(meta.id); + } catch (err) { + alert(`Upload failed: ${(err as Error).message}`); + } finally { + e.target.value = ''; + } + }; + + // Filter files for attach modal + const filteredFilesToAttach = useMemo(() => { + const q = attachSearch.trim().toLowerCase(); + if (!q) return files; + return files.filter(f => f.name.toLowerCase().includes(q)); + }, [files, attachSearch]); + + // Filter files for Quick Chat attach modal + const filteredQuickChatFiles = useMemo(() => { + const q = quickChatAttachSearch.trim().toLowerCase(); + if (!q) return files; + return files.filter(f => f.name.toLowerCase().includes(q)); + }, [files, quickChatAttachSearch]); + if (!isOpen) { return (
= ({ isOpen, onToggle, onInteract }) => { // Use getActiveContext which respects the user's selected traces const context = getActiveContext(runningNodeId); + // Calculate scopes: all nodes in the current trace path + const projectPath = currentBlueprintPath || 'untitled'; + + // Compute all upstream node IDs by traversing edges backward + const traceNodeIds = new Set(); + traceNodeIds.add(runningNodeId); + + const visited = new Set(); + const queue = [runningNodeId]; + + while (queue.length > 0) { + const currentNodeId = queue.shift()!; + if (visited.has(currentNodeId)) continue; + visited.add(currentNodeId); + + // Find all incoming edges to this node + const incomingEdges = edges.filter(e => e.target === currentNodeId); + for (const edge of incomingEdges) { + const sourceNodeId = edge.source; + if (!visited.has(sourceNodeId)) { + traceNodeIds.add(sourceNodeId); + queue.push(sourceNodeId); + } + } + } + + // Build scopes for all nodes in the trace path + const scopes = Array.from(traceNodeIds).map(nodeId => `${projectPath}/${nodeId}`); + console.log('[file_search] trace scopes:', scopes); + + // If no prompt but has files, use a default prompt + const attachedFiles = selectedNode.data.attachedFileIds || []; + const effectivePrompt = runningPrompt?.trim() + ? runningPrompt + : attachedFiles.length > 0 + ? 'Please analyze the attached files.' + : ''; + try { const response = await fetch('http://localhost:8000/api/run_node_stream', { method: 'POST', @@ -168,7 +299,9 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { body: JSON.stringify({ node_id: runningNodeId, incoming_contexts: [{ messages: context }], - user_prompt: runningPrompt, + user_prompt: effectivePrompt, + attached_file_ids: attachedFiles, + scopes, merge_strategy: selectedNode.data.mergeStrategy || 'smart', config: { provider: selectedNode.data.model.includes('gpt') || selectedNode.data.model === 'o3' ? 'openai' : 'google', @@ -415,6 +548,34 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { const hasResponse = !!selectedNode.data.response; const hasDraftPrompt = !!selectedNode.data.userPrompt && !hasResponse; + // Helper to extract node ID from message ID (format: nodeId-u or nodeId-a) + const getNodeIdFromMsgId = (msgId: string): string | null => { + if (!msgId) return null; + const parts = msgId.split('-'); + if (parts.length >= 2) { + // Remove last part (-u or -a) and rejoin + return parts.slice(0, -1).join('-'); + } + return null; + }; + + // Helper to build sentFiles from messages + const buildSentFilesFromMessages = (messages: Message[]): {msgId: string, fileIds: string[]}[] => { + const sentFiles: {msgId: string, fileIds: string[]}[] = []; + for (const msg of messages) { + if (msg.role === 'user' && msg.id) { + const nodeId = getNodeIdFromMsgId(msg.id); + if (nodeId) { + const node = nodes.find(n => n.id === nodeId); + if (node && node.data.attachedFileIds && node.data.attachedFileIds.length > 0) { + sentFiles.push({ msgId: msg.id, fileIds: node.data.attachedFileIds }); + } + } + } + } + return sentFiles; + }; + if (isNewTrace || !trace) { // Start a new trace from current node const initialMessages: Message[] = []; @@ -433,6 +594,7 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { messages: initialMessages }); setQuickChatMessages(initialMessages); + setQuickChatSentFiles(buildSentFilesFromMessages(initialMessages)); setQuickChatNeedsDuplicate(false); setQuickChatLastNodeId(selectedNode.id); } else { @@ -456,6 +618,7 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { messages: fullMessages }); setQuickChatMessages(fullMessages); + setQuickChatSentFiles(buildSentFilesFromMessages(fullMessages)); // Set last node ID: if current node has response, start from here. // Otherwise start from trace source (which is the last completed node) @@ -471,6 +634,58 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { setQuickChatOpen(false); setQuickChatTrace(null); setQuickChatMessages([]); + setQuickChatAttachedFiles([]); + setQuickChatSentFiles([]); + }; + + // Quick Chat file attachment helpers + const getQuickChatScope = () => { + const projectPath = currentBlueprintPath || 'untitled'; + return `${projectPath}/quick_chat_temp`; + }; + + const handleQuickChatAttach = async (fileId: string) => { + if (!quickChatAttachedFiles.includes(fileId)) { + setQuickChatAttachedFiles(prev => [...prev, fileId]); + // Add scope to file for filtering + try { + await addFileScope(fileId, getQuickChatScope()); + } catch (e) { + console.error('Failed to add file scope:', e); + } + } + setShowQuickChatAttachModal(false); + }; + + const handleQuickChatDetach = async (fileId: string) => { + setQuickChatAttachedFiles(prev => prev.filter(id => id !== fileId)); + // Remove scope from file + try { + await removeFileScope(fileId, getQuickChatScope()); + } catch (e) { + console.error('Failed to remove file scope:', e); + } + }; + + const handleQuickChatUpload = async (e: React.ChangeEvent) => { + if (!e.target.files || e.target.files.length === 0) return; + const file = e.target.files[0]; + setQuickChatUploading(true); + try { + const meta = await uploadFile(file, { provider: 'local' }); + setQuickChatAttachedFiles(prev => [...prev, meta.id]); + // Add scope to file for filtering + try { + await addFileScope(meta.id, getQuickChatScope()); + } catch (e) { + console.error('Failed to add file scope:', e); + } + } catch (err) { + alert(`Upload failed: ${(err as Error).message}`); + } finally { + e.target.value = ''; + setQuickChatUploading(false); + } }; // Open Quick Chat for a merged trace @@ -482,6 +697,16 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { const hasResponse = !!selectedNode.data.response; const hasDraftPrompt = !!selectedNode.data.userPrompt && !hasResponse; + // Helper to extract node ID from message ID (format: nodeId-u or nodeId-a) + const getNodeIdFromMsgId = (msgId: string): string | null => { + if (!msgId) return null; + const parts = msgId.split('-'); + if (parts.length >= 2) { + return parts.slice(0, -1).join('-'); + } + return null; + }; + // Build messages from merged trace const fullMessages: Message[] = [...merged.messages]; // Only include current node's content if it was sent @@ -492,6 +717,20 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { fullMessages.push({ id: `${selectedNode.id}-a`, role: 'assistant', content: selectedNode.data.response }); } + // Build sentFiles from messages + const sentFiles: {msgId: string, fileIds: string[]}[] = []; + for (const msg of fullMessages) { + if (msg.role === 'user' && msg.id) { + const nodeId = getNodeIdFromMsgId(msg.id); + if (nodeId) { + const node = nodes.find(n => n.id === nodeId); + if (node && node.data.attachedFileIds && node.data.attachedFileIds.length > 0) { + sentFiles.push({ msgId: msg.id, fileIds: node.data.attachedFileIds }); + } + } + } + } + // Create a pseudo-trace for the merged context setQuickChatTrace({ id: merged.id, @@ -500,6 +739,7 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { messages: fullMessages }); setQuickChatMessages(fullMessages); + setQuickChatSentFiles(sentFiles); setQuickChatNeedsDuplicate(false); // Merged traces don't duplicate setQuickChatOpen(true); @@ -718,19 +958,30 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { const activeTracesCheck = selectedNode ? checkActiveTracesComplete() : { complete: true }; const handleQuickChatSend = async () => { - if (!quickChatInput.trim() || !quickChatTrace || quickChatLoading || !selectedNode) return; + // Allow send if there's text OR attached files + const hasContent = quickChatInput.trim() || quickChatAttachedFiles.length > 0; + if (!hasContent || !quickChatTrace || quickChatLoading || !selectedNode) return; const userInput = quickChatInput; + const attachedFilesCopy = [...quickChatAttachedFiles]; + const msgId = `qc_${Date.now()}_u`; + const userMessage: Message = { - id: `qc_${Date.now()}_u`, + id: msgId, role: 'user', - content: userInput + content: userInput || '[Files attached]' }; + // Track sent files for display + if (attachedFilesCopy.length > 0) { + setQuickChatSentFiles(prev => [...prev, { msgId, fileIds: attachedFilesCopy }]); + } + // Add user message to display const messagesBeforeSend = [...quickChatMessages]; setQuickChatMessages(prev => [...prev, userMessage]); setQuickChatInput(''); + setQuickChatAttachedFiles([]); // Clear attached files after send setQuickChatLoading(true); // Store model at send time to avoid issues with model switching during streaming @@ -745,6 +996,10 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { const reasoningModels = ['gpt-5', 'gpt-5-chat-latest', 'gpt-5-mini', 'gpt-5-nano', 'gpt-5-pro', 'gpt-5.1', 'gpt-5.1-chat-latest', 'o3']; const isReasoning = reasoningModels.includes(modelAtSend); + // Build scopes for file search (Quick Chat uses a temp scope) + const projectPath = currentBlueprintPath || 'untitled'; + const scopes = [`${projectPath}/quick_chat_temp`]; + // Call LLM API with current messages as context const response = await fetch('http://localhost:8000/api/run_node_stream', { method: 'POST', @@ -752,7 +1007,9 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { body: JSON.stringify({ node_id: 'quick_chat_temp', incoming_contexts: [{ messages: messagesBeforeSend }], - user_prompt: userInput, + user_prompt: userInput || 'Please analyze the attached files.', + attached_file_ids: attachedFilesCopy, + scopes, merge_strategy: 'smart', config: { provider: isOpenAI ? 'openai' : 'google', @@ -807,6 +1064,7 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { temperature: isReasoning ? 1 : tempAtSend, reasoningEffort: effortAtSend, enableGoogleSearch: webSearchAtSend, + attachedFileIds: attachedFilesCopy, status: 'success', querySentAt: Date.now(), responseReceivedAt: Date.now() @@ -850,6 +1108,7 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { forkedTraces: [], mergedTraces: [], activeTraceIds: [], + attachedFileIds: attachedFilesCopy, response: fullResponse, status: 'success' as const, inputs: 1, @@ -1575,6 +1834,81 @@ const Sidebar: React.FC = ({ isOpen, onToggle, onInteract }) => { {activeTab === 'settings' && (
+ {/* Attachments Section */} +
+ + +
+ + + + +
+ + {(selectedNode.data.attachedFileIds || []).length === 0 ? ( +

+ No files attached. +

+ ) : ( +
+ {(selectedNode.data.attachedFileIds || []).map(id => { + const file = files.find(f => f.id === id); + if (!file) return null; + return ( +
+
+ + + {file.name} + +
+ +
+ ); + })} +
+ )} +
+
+
{/* Input Area */}
+ {/* Attached Files Preview */} + {quickChatAttachedFiles.length > 0 && ( +
+
+ {quickChatAttachedFiles.map(fileId => { + const file = files.find(f => f.id === fileId); + if (!file) return null; + return ( +
+ + {file.name} + +
+ ); + })} +
+
+ )}