diff options
Diffstat (limited to 'backend')
19 files changed, 3898 insertions, 58 deletions
diff --git a/backend/app/auth/__init__.py b/backend/app/auth/__init__.py new file mode 100644 index 0000000..8234b6f --- /dev/null +++ b/backend/app/auth/__init__.py @@ -0,0 +1,17 @@ +from .routes import router as auth_router +from .routes import get_current_user, get_current_user_optional +from .models import User, get_db, init_db +from .utils import Token, UserCreate, UserResponse + +__all__ = [ + 'auth_router', + 'get_current_user', + 'get_current_user_optional', + 'User', + 'get_db', + 'init_db', + 'Token', + 'UserCreate', + 'UserResponse', +] + diff --git a/backend/app/auth/models.py b/backend/app/auth/models.py new file mode 100644 index 0000000..8477ba2 --- /dev/null +++ b/backend/app/auth/models.py @@ -0,0 +1,44 @@ +import os +from sqlalchemy import Column, Integer, String, DateTime, Text, create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from datetime import datetime + +# Database configuration +DATA_ROOT = os.path.abspath(os.getenv("DATA_ROOT", os.path.join(os.getcwd(), "data"))) +DATABASE_PATH = os.path.join(DATA_ROOT, "users.db") +DATABASE_URL = f"sqlite:///{DATABASE_PATH}" + +engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False}) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) +Base = declarative_base() + + +class User(Base): + __tablename__ = "users" + + id = Column(Integer, primary_key=True, index=True) + username = Column(String(50), unique=True, index=True, nullable=False) + email = Column(String(100), unique=True, index=True, nullable=False) + hashed_password = Column(String(255), nullable=False) + created_at = Column(DateTime, default=datetime.utcnow) + is_active = Column(Integer, default=1) + # API Keys (stored encrypted in production, plain for simplicity here) + openai_api_key = Column(Text, nullable=True) + gemini_api_key = Column(Text, nullable=True) + + +def init_db(): + """Initialize database tables""" + os.makedirs(DATA_ROOT, exist_ok=True) + Base.metadata.create_all(bind=engine) + + +def get_db(): + """Dependency to get database session""" + db = SessionLocal() + try: + yield db + finally: + db.close() + diff --git a/backend/app/auth/routes.py b/backend/app/auth/routes.py new file mode 100644 index 0000000..3c906b5 --- /dev/null +++ b/backend/app/auth/routes.py @@ -0,0 +1,269 @@ +from fastapi import APIRouter, Depends, HTTPException, status +from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm +from sqlalchemy.orm import Session +from typing import Optional +from pydantic import BaseModel + +from .models import User, get_db +from .utils import ( + Token, UserCreate, UserLogin, UserResponse, + verify_password, get_password_hash, create_access_token, decode_token +) + +router = APIRouter(prefix="/api/auth", tags=["Authentication"]) + +# OAuth2 scheme for token extraction +oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/login", auto_error=True) +oauth2_scheme_optional = OAuth2PasswordBearer(tokenUrl="/api/auth/login", auto_error=False) + + +async def get_current_user( + token: str = Depends(oauth2_scheme), + db: Session = Depends(get_db) +) -> User: + """ + Dependency: Validate JWT token and return current user. + Raises 401 if token is invalid or user not found. + """ + username = decode_token(token) + if not username: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or expired token", + headers={"WWW-Authenticate": "Bearer"}, + ) + + user = db.query(User).filter(User.username == username).first() + if not user: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="User not found", + headers={"WWW-Authenticate": "Bearer"}, + ) + + if not user.is_active: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="User account is disabled" + ) + + return user + + +async def get_current_user_optional( + token: Optional[str] = Depends(oauth2_scheme_optional), + db: Session = Depends(get_db) +) -> Optional[User]: + """ + Dependency: Try to get current user, but don't fail if not authenticated. + Returns None if no valid token. + """ + if not token: + return None + + username = decode_token(token) + if not username: + return None + + user = db.query(User).filter(User.username == username).first() + if not user or not user.is_active: + return None + + return user + + +@router.get("/check-username/{username}") +async def check_username(username: str, db: Session = Depends(get_db)): + """ + Check if a username is available. + """ + existing = db.query(User).filter(User.username == username).first() + return {"available": existing is None} + + +@router.get("/check-email/{email}") +async def check_email(email: str, db: Session = Depends(get_db)): + """ + Check if an email is available. + """ + existing = db.query(User).filter(User.email == email).first() + return {"available": existing is None} + + +@router.post("/register", response_model=UserResponse, status_code=status.HTTP_201_CREATED) +async def register(user_data: UserCreate, db: Session = Depends(get_db)): + """ + Register a new user account. + """ + # Check if username already exists + existing_user = db.query(User).filter(User.username == user_data.username).first() + if existing_user: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Username already registered" + ) + + # Check if email already exists + existing_email = db.query(User).filter(User.email == user_data.email).first() + if existing_email: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Email already registered" + ) + + # Validate password length + if len(user_data.password) < 6: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Password must be at least 6 characters" + ) + + # Create new user + user = User( + username=user_data.username, + email=user_data.email, + hashed_password=get_password_hash(user_data.password) + ) + db.add(user) + db.commit() + db.refresh(user) + + return user + + +@router.post("/login", response_model=Token) +async def login(form_data: OAuth2PasswordRequestForm = Depends(), db: Session = Depends(get_db)): + """ + Login with username and password, returns JWT token. + """ + # Find user by username + user = db.query(User).filter(User.username == form_data.username).first() + + if not user: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + headers={"WWW-Authenticate": "Bearer"}, + ) + + if not verify_password(form_data.password, user.hashed_password): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + headers={"WWW-Authenticate": "Bearer"}, + ) + + if not user.is_active: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="User account is disabled" + ) + + # Create access token + access_token = create_access_token(data={"sub": user.username}) + + return { + "access_token": access_token, + "token_type": "bearer", + "username": user.username + } + + +@router.post("/login/json", response_model=Token) +async def login_json(user_data: UserLogin, db: Session = Depends(get_db)): + """ + Login with JSON body (alternative to form-data). + """ + # Find user by username + user = db.query(User).filter(User.username == user_data.username).first() + + if not user: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + ) + + if not verify_password(user_data.password, user.hashed_password): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + ) + + if not user.is_active: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="User account is disabled" + ) + + # Create access token + access_token = create_access_token(data={"sub": user.username}) + + return { + "access_token": access_token, + "token_type": "bearer", + "username": user.username + } + + +@router.get("/me", response_model=UserResponse) +async def get_me(current_user: User = Depends(get_current_user)): + """ + Get current authenticated user's info. + """ + return current_user + + +@router.get("/api-keys") +async def get_api_keys(current_user: User = Depends(get_current_user)): + """ + Get current user's API keys (masked for security). + """ + def mask_key(key: str | None) -> str: + if not key: + return "" + if len(key) <= 8: + return "*" * len(key) + return key[:4] + "*" * (len(key) - 8) + key[-4:] + + return { + "openai_api_key": mask_key(current_user.openai_api_key), + "gemini_api_key": mask_key(current_user.gemini_api_key), + "has_openai_key": bool(current_user.openai_api_key), + "has_gemini_key": bool(current_user.gemini_api_key), + } + + +class ApiKeysUpdate(BaseModel): + openai_api_key: Optional[str] = None + gemini_api_key: Optional[str] = None + + +@router.post("/api-keys") +async def update_api_keys( + keys: ApiKeysUpdate, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db) +): + """ + Update current user's API keys. + Pass empty string to clear a key, or omit to keep unchanged. + """ + if keys.openai_api_key is not None: + current_user.openai_api_key = keys.openai_api_key if keys.openai_api_key else None + + if keys.gemini_api_key is not None: + current_user.gemini_api_key = keys.gemini_api_key if keys.gemini_api_key else None + + db.commit() + + return {"message": "API keys updated successfully"} + + +@router.post("/logout") +async def logout(): + """ + Logout endpoint (client should discard the token). + JWT tokens are stateless, so this is just for API completeness. + """ + return {"message": "Successfully logged out"} + diff --git a/backend/app/auth/utils.py b/backend/app/auth/utils.py new file mode 100644 index 0000000..5889279 --- /dev/null +++ b/backend/app/auth/utils.py @@ -0,0 +1,73 @@ +import os +import bcrypt +from datetime import datetime, timedelta +from typing import Optional +from jose import JWTError, jwt +from pydantic import BaseModel, EmailStr + +# Configuration - use environment variables in production +SECRET_KEY = os.getenv("JWT_SECRET_KEY", "contextflow-secret-key-change-in-production-2024") +ALGORITHM = "HS256" +ACCESS_TOKEN_EXPIRE_MINUTES = int(os.getenv("JWT_EXPIRE_MINUTES", "1440")) # 24 hours default + + +# Pydantic models for request/response +class Token(BaseModel): + access_token: str + token_type: str + username: str + + +class TokenData(BaseModel): + username: Optional[str] = None + + +class UserCreate(BaseModel): + username: str + email: EmailStr + password: str + + +class UserLogin(BaseModel): + username: str + password: str + + +class UserResponse(BaseModel): + id: int + username: str + email: str + created_at: datetime + is_active: int + + class Config: + from_attributes = True + + +def verify_password(plain_password: str, hashed_password: str) -> bool: + """Verify a password against its hash""" + return bcrypt.checkpw(plain_password.encode('utf-8'), hashed_password.encode('utf-8')) + + +def get_password_hash(password: str) -> str: + """Hash a password""" + return bcrypt.hashpw(password.encode('utf-8'), bcrypt.gensalt()).decode('utf-8') + + +def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str: + """Create a JWT access token""" + to_encode = data.copy() + expire = datetime.utcnow() + (expires_delta or timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)) + to_encode.update({"exp": expire}) + return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM) + + +def decode_token(token: str) -> Optional[str]: + """Decode a JWT token and return the username""" + try: + payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) + username: str = payload.get("sub") + return username + except JWTError: + return None + diff --git a/backend/app/main.py b/backend/app/main.py index 48cb89f..c254652 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,15 +1,31 @@ -from fastapi import FastAPI, HTTPException +import asyncio +import tempfile +import time +from fastapi import FastAPI, HTTPException, Depends from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import StreamingResponse -from app.schemas import NodeRunRequest, NodeRunResponse, MergeStrategy, Role, Message, Context -from app.services.llm import llm_streamer +from fastapi.responses import StreamingResponse, FileResponse +from fastapi import UploadFile, File, Form +from pydantic import BaseModel +from app.schemas import NodeRunRequest, NodeRunResponse, MergeStrategy, Role, Message, Context, LLMConfig, ModelProvider, ReasoningEffort +from app.services.llm import llm_streamer, generate_title, get_openai_client +from app.auth import auth_router, get_current_user, get_current_user_optional, init_db, User, get_db +from app.auth.utils import get_password_hash from dotenv import load_dotenv import os +import json +import shutil +from typing import List, Literal, Optional +from uuid import uuid4 +from google import genai +from sqlalchemy.orm import Session load_dotenv() app = FastAPI(title="ContextFlow Backend") +# Include authentication router +app.include_router(auth_router) + app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -18,6 +34,195 @@ app.add_middleware( allow_headers=["*"], ) +# Initialize database on startup +@app.on_event("startup") +async def startup_event(): + """Initialize database and create default test user if not exists""" + init_db() + + # Create test user if not exists + from app.auth.models import SessionLocal + db = SessionLocal() + try: + existing = db.query(User).filter(User.username == "test").first() + if not existing: + test_user = User( + username="test", + email="test@contextflow.local", + hashed_password=get_password_hash("114514") + ) + db.add(test_user) + db.commit() + print("[startup] Created default test user (test/114514)") + else: + print("[startup] Test user already exists") + finally: + db.close() + +# --------- Project / Blueprint storage --------- +DATA_ROOT = os.path.abspath(os.getenv("DATA_ROOT", os.path.join(os.getcwd(), "data"))) +DEFAULT_USER = "test" +ARCHIVE_FILENAME = "archived_nodes.json" +VALID_FILE_PROVIDERS = {"local", "openai", "google"} +OPENAI_MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB limit per OpenAI docs +OPENAI_DEFAULT_FILE_PURPOSE = os.getenv("OPENAI_FILE_PURPOSE", "user_data") + +def get_user_api_key(user: User | None, provider: str) -> str | None: + """ + Get API key for a provider from user's saved settings. + Falls back to environment variable if user has no key set. + """ + if user: + if provider == "openai" and user.openai_api_key: + return user.openai_api_key + if provider in ("google", "gemini") and user.gemini_api_key: + return user.gemini_api_key + # Fallback to environment variables + if provider == "openai": + return os.getenv("OPENAI_API_KEY") + if provider in ("google", "gemini"): + return os.getenv("GOOGLE_API_KEY") + return None + +def ensure_user_root(user: str) -> str: + """ + Ensures the new data root structure: + data/<user>/projects + data/<user>/archive + """ + user_root = os.path.join(DATA_ROOT, user) + projects_root = os.path.join(user_root, "projects") + archive_root = os.path.join(user_root, "archive") + os.makedirs(projects_root, exist_ok=True) + os.makedirs(archive_root, exist_ok=True) + return user_root + + +def projects_root(user: str) -> str: + return os.path.join(ensure_user_root(user), "projects") + + +def archive_root(user: str) -> str: + return os.path.join(ensure_user_root(user), "archive") + + +def files_root(user: str) -> str: + root = os.path.join(ensure_user_root(user), "files") + os.makedirs(root, exist_ok=True) + return root + + +def migrate_legacy_layout(user: str): + """ + Migrate from legacy ./projects/<user> and legacy archive folders to the new data/<user>/ structure. + """ + legacy_root = os.path.abspath(os.path.join(os.getcwd(), "projects", user)) + new_projects = projects_root(user) + if os.path.exists(legacy_root) and not os.listdir(new_projects): + try: + for name in os.listdir(legacy_root): + src = os.path.join(legacy_root, name) + dst = os.path.join(new_projects, name) + if not os.path.exists(dst): + shutil.move(src, dst) + except Exception: + pass + # migrate legacy archive (archived/ or .cf_archived/) + legacy_archives = [ + os.path.join(legacy_root, "archived", ARCHIVE_FILENAME), + os.path.join(legacy_root, ".cf_archived", ARCHIVE_FILENAME), + ] + new_archive_file = archived_path(user) + if not os.path.exists(new_archive_file): + for legacy in legacy_archives: + if os.path.exists(legacy): + os.makedirs(os.path.dirname(new_archive_file), exist_ok=True) + try: + shutil.move(legacy, new_archive_file) + except Exception: + pass + +def safe_path(user: str, relative_path: str) -> str: + root = projects_root(user) + norm = os.path.normpath(relative_path).lstrip(os.sep) + full = os.path.abspath(os.path.join(root, norm)) + if not full.startswith(root): + raise HTTPException(status_code=400, detail="Invalid path") + return full + +class FSItem(BaseModel): + name: str + path: str # path relative to user root + type: Literal["file", "folder"] + size: Optional[int] = None + mtime: Optional[float] = None + children: Optional[List["FSItem"]] = None + +FSItem.model_rebuild() + +def list_tree(user: str, relative_path: str = ".") -> List[FSItem]: + migrate_legacy_layout(user) + root = safe_path(user, relative_path) + items: List[FSItem] = [] + for name in sorted(os.listdir(root)): + full = os.path.join(root, name) + rel = os.path.relpath(full, projects_root(user)) + stat = os.stat(full) + if os.path.isdir(full): + items.append(FSItem( + name=name, + path=rel, + type="folder", + size=None, + mtime=stat.st_mtime, + children=list_tree(user, rel) + )) + else: + items.append(FSItem( + name=name, + path=rel, + type="file", + size=stat.st_size, + mtime=stat.st_mtime, + children=None + )) + return items + +class SaveBlueprintRequest(BaseModel): + user: str = DEFAULT_USER + path: str # relative path including filename.json + content: dict + +class RenameRequest(BaseModel): + user: str = DEFAULT_USER + path: str + new_name: Optional[str] = None + new_path: Optional[str] = None + +class FileMeta(BaseModel): + id: str + name: str + size: int + mime: str + created_at: float + provider: Optional[str] = None + provider_file_id: Optional[str] = None + openai_file_id: Optional[str] = None + openai_vector_store_id: Optional[str] = None + # Scopes for filtering: "project_path/node_id" composite keys + scopes: List[str] = [] + +class FolderRequest(BaseModel): + user: str = DEFAULT_USER + path: str # relative folder path + +class DeleteRequest(BaseModel): + user: str = DEFAULT_USER + path: str + is_folder: bool = False + +# ----------------------------------------------- + @app.get("/") def read_root(): return {"message": "ContextFlow Backend is running"} @@ -60,10 +265,23 @@ def smart_merge_messages(messages: list[Message]) -> list[Message]: return merged @app.post("/api/run_node_stream") -async def run_node_stream(request: NodeRunRequest): +async def run_node_stream( + request: NodeRunRequest, + current_user: User | None = Depends(get_current_user_optional) +): """ Stream the response from the LLM. """ + # Get API key from user settings if not provided in request + provider_name = request.config.provider.value if hasattr(request.config.provider, 'value') else str(request.config.provider) + if not request.config.api_key: + user_key = get_user_api_key(current_user, provider_name.lower()) + if user_key: + request.config.api_key = user_key + + # Get username for file operations + username = current_user.username if current_user else DEFAULT_USER + # 1. Concatenate all incoming contexts first raw_messages = [] for ctx in request.incoming_contexts: @@ -79,7 +297,819 @@ async def run_node_stream(request: NodeRunRequest): execution_context = Context(messages=final_messages) + tools: List[dict] = [] + attachments: List[dict] = [] + + if request.config.provider == ModelProvider.OPENAI: + vs_ids, debug_refs, filters = await prepare_openai_vector_search( + user=username, + attached_ids=request.attached_file_ids, + scopes=request.scopes, + llm_config=request.config, + ) + # Always enable file_search if vector store exists (even without explicit attachments) + # This allows nodes to access files attached in previous nodes of the trace + if not vs_ids: + # Try to get user's vector store anyway + try: + client = get_openai_client(request.config.api_key) + vs_id = await ensure_user_vector_store(username, client) + if vs_id: + vs_ids = [vs_id] + except Exception as e: + print(f"[warn] Could not get vector store: {e}") + + if vs_ids: + tool_def = {"type": "file_search", "vector_store_ids": vs_ids} + if filters: + tool_def["filters"] = filters + tools.append(tool_def) + print(f"[openai file_search] vs_ids={vs_ids} refs={debug_refs} filters={filters}") + elif request.config.provider == ModelProvider.GOOGLE: + attachments = await prepare_attachments( + user=username, + target_provider=request.config.provider, + attached_ids=request.attached_file_ids, + llm_config=request.config, + ) + return StreamingResponse( - llm_streamer(execution_context, request.user_prompt, request.config), + llm_streamer(execution_context, request.user_prompt, request.config, attachments, tools), media_type="text/event-stream" ) + +class TitleRequest(BaseModel): + user_prompt: str + response: str + +class TitleResponse(BaseModel): + title: str + +@app.post("/api/generate_title", response_model=TitleResponse) +async def generate_title_endpoint( + request: TitleRequest, + current_user: User | None = Depends(get_current_user_optional) +): + """ + Generate a short title for a Q-A pair using gpt-5-nano. + Returns 3-4 short English words summarizing the topic. + """ + api_key = get_user_api_key(current_user, "openai") + title = await generate_title(request.user_prompt, request.response, api_key) + return TitleResponse(title=title) + + +class SummarizeRequest(BaseModel): + content: str + model: str # Model to use for summarization + +class SummarizeResponse(BaseModel): + summary: str + +@app.post("/api/summarize", response_model=SummarizeResponse) +async def summarize_endpoint( + request: SummarizeRequest, + current_user: User | None = Depends(get_current_user_optional) +): + """ + Summarize the given content using the specified model. + """ + from app.services.llm import summarize_content + openai_key = get_user_api_key(current_user, "openai") + gemini_key = get_user_api_key(current_user, "gemini") + summary = await summarize_content(request.content, request.model, openai_key, gemini_key) + return SummarizeResponse(summary=summary) + +# ---------------- Project / Blueprint APIs ---------------- +@app.get("/api/projects/tree", response_model=List[FSItem]) +def get_project_tree(user: str = DEFAULT_USER): + """ + List all files/folders for the user under the projects root. + """ + ensure_user_root(user) + return list_tree(user) + + +@app.post("/api/projects/create_folder") +def create_folder(req: FolderRequest): + """ + Create a folder (and parents) under the user's project root. + """ + try: + folder_path = safe_path(req.user, req.path) + os.makedirs(folder_path, exist_ok=True) + return {"ok": True} + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/projects/save_blueprint") +def save_blueprint(req: SaveBlueprintRequest): + """ + Save a blueprint JSON to disk. + """ + try: + full_path = safe_path(req.user, req.path) + os.makedirs(os.path.dirname(full_path), exist_ok=True) + with open(full_path, "w", encoding="utf-8") as f: + json.dump(req.content, f, ensure_ascii=False, indent=2) + return {"ok": True} + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/projects/file") +def read_blueprint(user: str = DEFAULT_USER, path: str = ""): + """ + Read a blueprint JSON file. + """ + if not path: + raise HTTPException(status_code=400, detail="path is required") + full_path = safe_path(user, path) + if not os.path.isfile(full_path): + raise HTTPException(status_code=404, detail="file not found") + try: + with open(full_path, "r", encoding="utf-8") as f: + data = json.load(f) + return {"content": data} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/projects/download") +def download_blueprint(user: str = DEFAULT_USER, path: str = ""): + """ + Download a blueprint file. + """ + if not path: + raise HTTPException(status_code=400, detail="path is required") + full_path = safe_path(user, path) + if not os.path.isfile(full_path): + raise HTTPException(status_code=404, detail="file not found") + return FileResponse(full_path, filename=os.path.basename(full_path), media_type="application/json") + + +@app.post("/api/projects/rename") +def rename_item(req: RenameRequest): + """ + Rename or move a file or folder. + - If new_path is provided, it is treated as the target relative path (move). + - Else, new_name is used within the same directory. + """ + try: + src = safe_path(req.user, req.path) + if not os.path.exists(src): + raise HTTPException(status_code=404, detail="source not found") + if req.new_path: + dst = safe_path(req.user, req.new_path) + else: + if not req.new_name: + raise HTTPException(status_code=400, detail="new_name or new_path required") + base_dir = os.path.dirname(src) + dst = os.path.join(base_dir, req.new_name) + # Ensure still inside user root + safe_path(req.user, os.path.relpath(dst, ensure_user_root(req.user))) + os.rename(src, dst) + return {"ok": True} + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/projects/delete") +def delete_item(req: DeleteRequest): + """ + Delete a file or folder. + """ + try: + target = safe_path(req.user, req.path) + if not os.path.exists(target): + raise HTTPException(status_code=404, detail="not found") + if os.path.isdir(target): + if not req.is_folder: + # Prevent deleting folder accidentally unless flagged + raise HTTPException(status_code=400, detail="set is_folder=True to delete folder") + shutil.rmtree(target) + else: + os.remove(target) + return {"ok": True} + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) +# ---------------------------------------------------------- + +# --------------- Archived Nodes APIs ---------------------- +def archived_path(user: str) -> str: + root = archive_root(user) + return os.path.join(root, ARCHIVE_FILENAME) + +# ---------------- Files (uploads) ---------------- +def files_index_path(user: str) -> str: + return os.path.join(files_root(user), "index.json") + +def user_vector_store_path(user: str) -> str: + return os.path.join(files_root(user), "vector_store.json") + +async def ensure_user_vector_store(user: str, client=None) -> str: + """ + Ensure there is a vector store for the user (OpenAI). + Persist the id under data/<user>/files/vector_store.json. + """ + path = user_vector_store_path(user) + if client is None: + client = get_openai_client() + + # Try existing cached ID + if os.path.exists(path): + try: + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + vs_id_cached = data.get("id") + if vs_id_cached: + try: + await client.vector_stores.retrieve(vector_store_id=vs_id_cached) + return vs_id_cached + except Exception: + # Possibly deleted; recreate below + pass + except Exception: + pass + + # create new + vs = await client.vector_stores.create(name=f"{user}-vs") + vs_id = getattr(vs, "id", None) + if not vs_id: + raise HTTPException(status_code=500, detail="Failed to create vector store") + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump({"id": vs_id}, f) + return vs_id + +async def ensure_openai_file_and_index(user: str, meta: FileMeta, path: str, llm_config: Optional[LLMConfig] = None) -> tuple[str, str]: + """ + Ensure the file is uploaded to OpenAI Files and added to the user's vector store. + Returns (openai_file_id, vector_store_id). + """ + client = get_openai_client(llm_config.api_key if llm_config else None) + vs_id = await ensure_user_vector_store(user, client) + + file_id = meta.openai_file_id or (meta.provider_file_id if meta.provider == "openai" else None) + if not file_id: + with open(path, "rb") as f: + content = f.read() + resp = await client.files.create( + file=(meta.name or "upload.bin", content), + purpose="assistants", + ) + file_id = getattr(resp, "id", None) + if not file_id: + raise HTTPException(status_code=500, detail="OpenAI file upload returned no file_id") + + await add_file_to_vector_store(vs_id, file_id, client=client) + return file_id, vs_id + +async def remove_file_from_vector_store(vs_id: str, file_id: str, client=None): + if not vs_id or not file_id: + return + if client is None: + client = get_openai_client() + try: + await client.vector_stores.files.delete(vector_store_id=vs_id, file_id=file_id) + except Exception as e: + print(f"[warn] remove_file_from_vector_store failed: {e}") + +async def add_file_to_vector_store(vs_id: str, file_id: str, client=None): + """ + Add a file to vector store with file_id as attribute for filtering. + We use file_id as the attribute so we can filter by specific files at query time. + """ + if client is None: + client = get_openai_client() + + # Use file_id as attribute for filtering + create_params = { + "vector_store_id": vs_id, + "file_id": file_id, + "attributes": {"file_id": file_id} # Enable filtering by file_id + } + + await client.vector_stores.files.create(**create_params) + # Poll until completed (limit capped at 100 per API spec) + for _ in range(20): + listing = await client.vector_stores.files.list(vector_store_id=vs_id, limit=100) + found = None + for item in getattr(listing, "data", []): + if getattr(item, "id", None) == file_id or getattr(item, "file_id", None) == file_id: + found = item + break + status = getattr(found, "status", None) if found else None + if status == "completed": + return + await asyncio.sleep(0.5) + # If not confirmed, still continue + return + +def load_files_index(user: str) -> List[FileMeta]: + path = files_index_path(user) + if not os.path.exists(path): + return [] + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + return [FileMeta(**item) for item in data] + + +def save_files_index(user: str, items: List[FileMeta]): + path = files_index_path(user) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump([item.model_dump() for item in items], f, ensure_ascii=False, indent=2) + + +async def prepare_attachments( + user: str, + target_provider: str, + attached_ids: List[str], + llm_config: LLMConfig, +) -> list[dict]: + """ + For each attached file ID: + - If already uploaded to the target provider, reuse provider_file_id/uri. + - Otherwise, upload with the original filename (required by OpenAI). + Returns a list of dicts describing attachment references for the provider. + """ + if not attached_ids: + return [] + + items = load_files_index(user) + items_map = {item.id: item for item in items} + attachments: list[dict] = [] + + if isinstance(target_provider, ModelProvider): + provider_norm = target_provider.value.lower() + else: + provider_norm = str(target_provider).lower() + + for fid in attached_ids: + meta = items_map.get(fid) + if not meta: + print(f"[warn] Attached file id not found, skipping: {fid}") + continue + + path = os.path.join(files_root(user), fid) + if not os.path.exists(path): + raise HTTPException(status_code=404, detail=f"Attached file missing on disk: {meta.name}") + + if provider_norm == ModelProvider.OPENAI or provider_norm == "openai": + # Reuse provider file id if available + if meta.provider == "openai" and meta.provider_file_id: + attachments.append({ + "provider": "openai", + "file_id": meta.provider_file_id, + "name": meta.name, + "mime": meta.mime, + }) + continue + + # Upload to OpenAI with original filename + with open(path, "rb") as f: + content = f.read() + size = len(content) + if size > OPENAI_MAX_FILE_SIZE: + raise HTTPException(status_code=400, detail=f"File {meta.name} exceeds OpenAI 50MB limit") + + try: + client = get_openai_client(llm_config.api_key) + resp = await client.files.create( + file=(meta.name or "upload.bin", content), + purpose=OPENAI_DEFAULT_FILE_PURPOSE, + ) + openai_file_id = getattr(resp, "id", None) + if not openai_file_id: + raise HTTPException(status_code=500, detail="OpenAI file upload returned no file_id") + attachments.append({ + "provider": "openai", + "file_id": openai_file_id, + "name": meta.name, + "mime": meta.mime, + }) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"OpenAI upload failed: {str(e)}") + + elif provider_norm == ModelProvider.GOOGLE or provider_norm == "google": + # Reuse uri/name if available and looks like a URI + if meta.provider == "google" and meta.provider_file_id and "://" in meta.provider_file_id: + attachments.append({ + "provider": "google", + "uri": meta.provider_file_id, + "name": meta.name, + "mime": meta.mime, + }) + continue + + key = llm_config.api_key or os.getenv("GOOGLE_API_KEY") + if not key: + raise HTTPException(status_code=500, detail="Google API Key not found") + client = genai.Client(api_key=key) + + tmp_path = None + try: + with open(path, "rb") as f: + content = f.read() + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp.write(content) + tmp_path = tmp.name + + google_resp = await asyncio.to_thread( + client.files.upload, + file=tmp_path, + config={"mimeType": meta.mime or "application/octet-stream"}, + ) + google_name = getattr(google_resp, "name", None) + google_uri = getattr(google_resp, "uri", None) + + # Poll for ACTIVE and uri if missing + if google_name: + for _ in range(10): + try: + info = await asyncio.to_thread(client.files.get, name=google_name) + state = getattr(info, "state", None) + google_uri = getattr(info, "uri", google_uri) + if str(state).upper().endswith("ACTIVE") or state == "ACTIVE": + break + await asyncio.sleep(1) + except Exception: + await asyncio.sleep(1) + print(f"[google upload] name={google_name} uri={google_uri}") + + uri = google_uri or google_name + if not uri: + raise HTTPException(status_code=500, detail="Google upload returned no uri/name") + attachments.append({ + "provider": "google", + "uri": uri, + "name": meta.name, + "mime": meta.mime, + }) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Google upload failed: {str(e)}") + finally: + if tmp_path and os.path.exists(tmp_path): + os.remove(tmp_path) + + else: + raise HTTPException(status_code=400, detail=f"Unsupported provider for attachments: {target_provider}") + + # Debug log + print(f"[attachments] provider={provider_norm} count={len(attachments)} detail={[{'name': a.get('name'), 'id': a.get('file_id', a.get('uri'))} for a in attachments]}") + return attachments + + +async def prepare_openai_vector_search( + user: str, + attached_ids: List[str], + scopes: List[str], + llm_config: LLMConfig, +) -> tuple[List[str], List[dict], Optional[dict]]: + """ + Ensure all attached files are uploaded to OpenAI Files (purpose=assistants) and added to the user's vector store. + Returns (vector_store_ids, openai_file_refs_for_debug, filters). + + Filtering logic: + - Include files whose scopes intersect with requested scopes + - ALSO include explicitly attached files (attached_ids) + - Deduplicate to avoid double-processing + - Filters are constructed using file_id attribute in vector store + """ + items = load_files_index(user) + items_map = {item.id: item for item in items} + + # Determine which files to include - combine scopes AND attached_ids + relevant_files_map: dict[str, FileMeta] = {} + + # First: add files matching scopes + if scopes: + for item in items: + if item.scopes and any(s in scopes for s in item.scopes): + relevant_files_map[item.id] = item + print(f"[file_search] scopes={scopes} matched_files={[f.name for f in relevant_files_map.values()]}") + + # Second: also add explicitly attached files (they should always be searchable) + if attached_ids: + for fid in attached_ids: + meta = items_map.get(fid) + if meta and fid not in relevant_files_map: + relevant_files_map[fid] = meta + print(f"[file_search] adding explicitly attached file: {meta.name}") + + relevant_files = list(relevant_files_map.values()) + + if not relevant_files: + return [], [], None + + changed = False + vs_ids: List[str] = [] + debug_refs: List[dict] = [] + file_ids_for_filter: List[str] = [] + + for meta in relevant_files: + path = os.path.join(files_root(user), meta.id) + if not os.path.exists(path): + print(f"[warn] Attached file missing on disk, skipping: {meta.id}") + continue + # Enforce 50MB OpenAI limit + file_size = os.path.getsize(path) + if file_size > OPENAI_MAX_FILE_SIZE: + print(f"[warn] File {meta.name} exceeds OpenAI 50MB limit, skipping") + continue + + openai_file_id, vs_id = await ensure_openai_file_and_index(user, meta, path, llm_config) + if meta.openai_file_id != openai_file_id or meta.openai_vector_store_id != vs_id: + meta.openai_file_id = openai_file_id + meta.openai_vector_store_id = vs_id + changed = True + vs_ids.append(vs_id) + debug_refs.append({"name": meta.name, "file_id": openai_file_id, "vs_id": vs_id}) + if openai_file_id: + file_ids_for_filter.append(openai_file_id) + + if changed: + save_files_index(user, list(items_map.values())) + + # deduplicate + vs_ids_unique = list({vid for vid in vs_ids if vid}) + + # Build filters to only search relevant files + filters = None + if file_ids_for_filter: + filters = {"type": "in", "key": "file_id", "value": file_ids_for_filter} + + return vs_ids_unique, debug_refs, filters + +# ------------------------------------------------- + +@app.get("/api/projects/archived") +def get_archived_nodes(user: str = DEFAULT_USER): + migrate_legacy_layout(user) + path = archived_path(user) + if not os.path.exists(path): + return {"archived": []} + try: + with open(path, "r", encoding="utf-8") as f: + return {"archived": json.load(f)} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/projects/archived") +def save_archived_nodes(payload: dict): + user = payload.get("user", DEFAULT_USER) + data = payload.get("archived", []) + try: + path = archived_path(user) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + return {"ok": True} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/files") +def list_files(user: str = DEFAULT_USER): + migrate_legacy_layout(user) + items = load_files_index(user) + return {"files": [item.model_dump() for item in items]} + + +@app.post("/api/files/upload") +async def upload_file( + user: str = DEFAULT_USER, + file: UploadFile = File(...), + provider: str = Form("local"), + purpose: Optional[str] = Form(None), +): + migrate_legacy_layout(user) + items = load_files_index(user) + file_id = str(uuid4()) + dest_root = files_root(user) + dest_path = os.path.join(dest_root, file_id) + file_name = file.filename or "upload.bin" + provider_normalized = (provider or "local").lower() + if provider_normalized not in VALID_FILE_PROVIDERS: + raise HTTPException(status_code=400, detail="Unsupported provider") + + try: + content = await file.read() + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + size = len(content) + if provider_normalized == "openai" and size > OPENAI_MAX_FILE_SIZE: + raise HTTPException(status_code=400, detail="OpenAI provider limit: max 50MB per file") + + provider_file_id: Optional[str] = None + provider_created_at: Optional[float] = None + + if provider_normalized == "openai": + try: + client = get_openai_client() + upload_purpose = purpose or OPENAI_DEFAULT_FILE_PURPOSE + resp = await client.files.create( + file=(file_name, content), + purpose=upload_purpose, + ) + provider_file_id = getattr(resp, "id", None) + provider_created_at = getattr(resp, "created_at", None) + except Exception as e: + raise HTTPException(status_code=500, detail=f"OpenAI upload failed: {str(e)}") + elif provider_normalized == "google": + try: + key = os.getenv("GOOGLE_API_KEY") + if not key: + raise HTTPException(status_code=500, detail="Google API Key not found") + client = genai.Client(api_key=key) + # The Google GenAI SDK upload is synchronous; run in thread to avoid blocking the event loop. + tmp_path = None + try: + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp.write(content) + tmp_path = tmp.name + google_resp = await asyncio.to_thread( + client.files.upload, + file=tmp_path, + config={"mimeType": file.content_type or "application/octet-stream"}, + ) + google_name = getattr(google_resp, "name", None) + google_uri = getattr(google_resp, "uri", None) + + # Poll for ACTIVE and uri if missing + if google_name: + for _ in range(10): + try: + info = await asyncio.to_thread(client.files.get, name=google_name) + state = getattr(info, "state", None) + google_uri = getattr(info, "uri", google_uri) + if str(state).upper().endswith("ACTIVE") or state == "ACTIVE": + break + await asyncio.sleep(1) + except Exception: + await asyncio.sleep(1) + + provider_file_id = google_uri or google_name + finally: + if tmp_path and os.path.exists(tmp_path): + os.remove(tmp_path) + + provider_created_at = time.time() + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Google upload failed: {str(e)}") + + try: + os.makedirs(dest_root, exist_ok=True) + with open(dest_path, "wb") as f: + f.write(content) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + created_at = provider_created_at or os.path.getmtime(dest_path) + + meta = FileMeta( + id=file_id, + name=file_name, + size=size, + mime=file.content_type or "application/octet-stream", + created_at=created_at, + provider=provider_normalized if provider_normalized != "local" else None, + provider_file_id=provider_file_id, + openai_file_id=None, + openai_vector_store_id=None, + ) + + # Always try to index into OpenAI vector store (if <=50MB) + if size <= OPENAI_MAX_FILE_SIZE: + try: + openai_file_id, vs_id = await ensure_openai_file_and_index(user, meta, dest_path, None) + meta.openai_file_id = openai_file_id + meta.openai_vector_store_id = vs_id + if provider_normalized == "openai" and not meta.provider_file_id: + meta.provider_file_id = openai_file_id + except Exception as e: + print(f"[warn] OpenAI indexing failed for {file_name}: {e}") + else: + print(f"[warn] Skipping OpenAI indexing for {file_name}: exceeds 50MB") + + items.append(meta) + save_files_index(user, items) + return {"file": meta} + + +@app.get("/api/files/download") +def download_file(user: str = DEFAULT_USER, file_id: str = ""): + migrate_legacy_layout(user) + items = load_files_index(user) + meta = next((i for i in items if i.id == file_id), None) + if not meta: + raise HTTPException(status_code=404, detail="file not found") + path = os.path.join(files_root(user), file_id) + if not os.path.exists(path): + raise HTTPException(status_code=404, detail="file missing on disk") + return FileResponse(path, filename=meta.name, media_type=meta.mime) + + +@app.post("/api/files/delete") +async def delete_file(user: str = DEFAULT_USER, file_id: str = ""): + migrate_legacy_layout(user) + items = load_files_index(user) + meta = next((i for i in items if i.id == file_id), None) + if not meta: + raise HTTPException(status_code=404, detail="file not found") + + # Remove from vector store and OpenAI Files if present + if meta.openai_vector_store_id and meta.openai_file_id: + await remove_file_from_vector_store(meta.openai_vector_store_id, meta.openai_file_id) + if meta.provider == "openai" and meta.provider_file_id: + try: + client = get_openai_client() + await client.files.delete(meta.provider_file_id) + except Exception as e: + raise HTTPException(status_code=500, detail=f"OpenAI delete failed: {str(e)}") + if meta.provider == "google" and meta.provider_file_id: + try: + key = os.getenv("GOOGLE_API_KEY") + if not key: + raise HTTPException(status_code=500, detail="Google API Key not found") + client = genai.Client(api_key=key) + await asyncio.to_thread(client.files.delete, meta.provider_file_id) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Google delete failed: {str(e)}") + + path = os.path.join(files_root(user), file_id) + if os.path.exists(path): + os.remove(path) + items = [i for i in items if i.id != file_id] + save_files_index(user, items) + return {"ok": True} + + +class AddScopeRequest(BaseModel): + user: str = DEFAULT_USER + file_id: str + scope: str # "project_path/node_id" composite key + + +@app.post("/api/files/add_scope") +def add_file_scope(request: AddScopeRequest): + """ + Add a scope to a file's scopes list. + Called when user attaches a file to a node. + """ + migrate_legacy_layout(request.user) + items = load_files_index(request.user) + meta = next((i for i in items if i.id == request.file_id), None) + if not meta: + raise HTTPException(status_code=404, detail="file not found") + + if request.scope not in meta.scopes: + meta.scopes.append(request.scope) + save_files_index(request.user, items) + + return {"file": meta.model_dump()} + + +class RemoveScopeRequest(BaseModel): + user: str = DEFAULT_USER + file_id: str + scope: str + + +@app.post("/api/files/remove_scope") +def remove_file_scope(request: RemoveScopeRequest): + """ + Remove a scope from a file's scopes list. + Called when user detaches a file from a node. + """ + migrate_legacy_layout(request.user) + items = load_files_index(request.user) + meta = next((i for i in items if i.id == request.file_id), None) + if not meta: + raise HTTPException(status_code=404, detail="file not found") + + if request.scope in meta.scopes: + meta.scopes.remove(request.scope) + save_files_index(request.user, items) + + return {"file": meta.model_dump()} diff --git a/backend/app/schemas.py b/backend/app/schemas.py index ac90bc1..54c0560 100644 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -24,6 +24,11 @@ class ModelProvider(str, Enum): OPENAI = "openai" GOOGLE = "google" +class ReasoningEffort(str, Enum): + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + class LLMConfig(BaseModel): provider: ModelProvider model_name: str @@ -31,6 +36,8 @@ class LLMConfig(BaseModel): max_tokens: int = 1000 system_prompt: Optional[str] = None api_key: Optional[str] = None # Optional override, usually from env + enable_google_search: bool = False + reasoning_effort: ReasoningEffort = ReasoningEffort.MEDIUM # For OpenAI reasoning models class MergeStrategy(str, Enum): RAW = "raw" @@ -42,6 +49,10 @@ class NodeRunRequest(BaseModel): user_prompt: str config: LLMConfig merge_strategy: MergeStrategy = MergeStrategy.SMART + attached_file_ids: List[str] = Field(default_factory=list) + # Scopes for file_search filtering: ["project_path/node_id", ...] + # Contains all project/node combinations in the current trace + scopes: List[str] = Field(default_factory=list) class NodeRunResponse(BaseModel): node_id: str diff --git a/backend/app/services/llm.py b/backend/app/services/llm.py index 958ab4c..660a69d 100644 --- a/backend/app/services/llm.py +++ b/backend/app/services/llm.py @@ -1,21 +1,21 @@ import os -from typing import AsyncGenerator +from typing import AsyncGenerator, List, Dict, Any, Optional import openai import google.generativeai as genai from app.schemas import LLMConfig, Message, Role, Context -# Simple in-memory cache for clients to avoid re-initializing constantly +# Cache OpenAI clients by API key to avoid re-initializing constantly # In a real app, use dependency injection or singletons -_openai_client = None +_openai_clients: dict[str, openai.AsyncOpenAI] = {} def get_openai_client(api_key: str = None): - global _openai_client + global _openai_clients key = api_key or os.getenv("OPENAI_API_KEY") if not key: raise ValueError("OpenAI API Key not found") - if not _openai_client: - _openai_client = openai.AsyncOpenAI(api_key=key) - return _openai_client + if key not in _openai_clients: + _openai_clients[key] = openai.AsyncOpenAI(api_key=key) + return _openai_clients[key] def configure_google(api_key: str = None): key = api_key or os.getenv("GOOGLE_API_KEY") @@ -23,8 +23,15 @@ def configure_google(api_key: str = None): raise ValueError("Google API Key not found") genai.configure(api_key=key) -async def stream_openai(messages: list[Message], config: LLMConfig) -> AsyncGenerator[str, None]: +async def stream_openai( + messages: list[Message], + config: LLMConfig, + attachments: Optional[List[Dict[str, Any]]] = None, + tools: Optional[List[Dict[str, Any]]] = None, +) -> AsyncGenerator[str, None]: client = get_openai_client(config.api_key) + attachments = attachments or [] + tools = tools or [] # Convert internal Message schema to OpenAI format openai_messages = [] @@ -34,61 +41,290 @@ async def stream_openai(messages: list[Message], config: LLMConfig) -> AsyncGene for msg in messages: openai_messages.append({"role": msg.role.value, "content": msg.content}) - stream = await client.chat.completions.create( - model=config.model_name, - messages=openai_messages, - temperature=config.temperature, - max_tokens=config.max_tokens, - stream=True + # Models that ONLY support Responses API (no Chat Completions fallback) + responses_only_models = ['gpt-5-pro'] + + # Models that CAN use Responses API (and thus support web_search tool) + model_lower = config.model_name.lower() + responses_capable_models = [ + 'gpt-5', 'gpt-5-chat-latest', 'gpt-5-mini', 'gpt-5-nano', + 'gpt-5-pro', 'gpt-5.1', 'gpt-5.1-chat-latest', 'o3', + 'o1', 'o1-preview', 'o1-mini', + 'gpt-4o', 'gpt-4o-mini', 'gpt-4o-realtime', 'gpt-4o-mini-tts' + ] + + # Use Responses API if: + # 1. Model ONLY supports Responses API, OR + # 2. User wants web search AND model is capable of Responses API + # 3. Attachments are present (Responses supports input_file) + use_responses_api = ( + config.model_name in responses_only_models or + (config.enable_google_search and (config.model_name in responses_capable_models or model_lower.startswith("gpt-4o"))) or + (attachments and (config.model_name in responses_capable_models or model_lower.startswith("gpt-4o"))) or + (tools) ) + if use_responses_api: + # Debug: Confirm config reception + # yield f"[Debug: Config Search={config.enable_google_search}, Model={config.model_name}]\n" + + # Use new client.responses.create API with Polling Strategy + # Build Responses API input + input_messages = [] + for msg in openai_messages: + if msg['role'] == 'system': + continue # goes to instructions + # User messages use input_text, assistant messages use output_text + content_type = "input_text" if msg['role'] == 'user' else "output_text" + input_messages.append({ + "role": msg['role'], + "content": [ + { + "type": content_type, + "text": msg['content'] + } + ] + }) + + # Append attachments as separate user message (files only) + file_parts = [] + for att in attachments: + if att.get("provider") == "openai" and att.get("file_id"): + file_parts.append({ + "type": "input_file", + "file_id": att["file_id"] + }) + if file_parts: + input_messages.append({ + "role": "user", + "content": file_parts + }) + + resp_params = { + "model": config.model_name, + "input": input_messages, # Full conversation history + "stream": False, # Get full output in one call + "background": False, + "store": True, + "tool_choice": "auto", + } + if tools: + resp_params["tools"] = tools + resp_params["tool_choice"] = "auto" + # Optional: include results for debugging / citations + resp_params["include"] = ["file_search_call.results"] + + # Add reasoning effort (not supported by chat-latest models) + models_without_effort = ['gpt-5-chat-latest', 'gpt-5.1-chat-latest'] + if config.model_name not in models_without_effort: + resp_params["reasoning"] = {"effort": config.reasoning_effort.value} + + # Enable Web Search if requested (Reusing enable_google_search flag as generic web_search flag) + # IMPORTANT: Append to existing tools instead of overwriting + if config.enable_google_search: + if resp_params.get("tools"): + resp_params["tools"].append({"type": "web_search"}) + else: + resp_params["tools"] = [{"type": "web_search"}] + resp_params["tool_choice"] = "auto" + + if config.system_prompt: + resp_params["instructions"] = config.system_prompt + + # Debug: print final tools being sent + print(f"[responses debug] final tools: {resp_params.get('tools')}") + + # 1. Create Response (non-background) + initial_resp = await client.responses.create(**resp_params) + response_id = initial_resp.id + + # 2. Poll for Completion + import asyncio + for _ in range(300): + final_resp = await client.responses.retrieve(response_id) + + if final_resp.status == 'completed': + # Debug: log outputs and tool calls + try: + outs = getattr(final_resp, "output", []) + print(f"[responses debug] output items: {[getattr(o, 'type', None) for o in outs]}") + for o in outs: + if getattr(o, "type", None) == "file_search_call": + print(f"[responses debug] file_search_call: {o}") + except Exception as e: + print(f"[responses debug] failed to inspect output: {e}") + + found_content = False + if hasattr(final_resp, 'output'): + for out in final_resp.output: + out_type = getattr(out, 'type', None) + out_content = getattr(out, 'content', None) + print(f"[responses debug] output item: type={out_type}, content={out_content}") + + if out_type == 'message' and out_content: + for c in out_content: + c_type = getattr(c, 'type', None) + c_text = getattr(c, 'text', None) + print(f"[responses debug] content item: type={c_type}, text={c_text[:100] if c_text else None}...") + if c_type == 'output_text': + text_val = getattr(c, 'text', None) + if text_val: + print(f"[responses debug] YIELDING text: {text_val[:50]}...") + yield text_val + print(f"[responses debug] YIELDED successfully") + found_content = True + + if not found_content: + print(f"[responses debug] No content found! final_resp.output={final_resp.output}") + yield f"\n[Debug: Completed but no content extracted]" + return + + elif final_resp.status in ['failed', 'cancelled', 'expired']: + error_msg = getattr(final_resp, 'error', 'Unknown error') + yield f"\n[Error: Response generation {final_resp.status}: {error_msg}]" + return + + await asyncio.sleep(2) + + yield "\n[Error: Polling timed out]" + return + + # Standard Chat Completions API (attachments not supported here) + if attachments: + yield "[Error] Attachments are only supported for Responses API-capable models." + return + + # Prepare parameters + req_params = { + "model": config.model_name, + "messages": openai_messages, + "stream": True + } + + # Identify reasoning models + is_reasoning_model = config.model_name in [ + 'gpt-5', 'gpt-5-chat-latest', 'gpt-5-mini', 'gpt-5-nano', + 'gpt-5-pro', 'gpt-5.1', 'gpt-5.1-chat-latest', 'o3', + 'o1', 'o1-mini', 'o1-preview' + ] + + if is_reasoning_model: + # Reasoning models use max_completion_tokens + if config.max_tokens: + req_params["max_completion_tokens"] = config.max_tokens + # IMPORTANT: Reasoning models often DO NOT support 'temperature'. + # We skip adding it. + else: + req_params["max_tokens"] = config.max_tokens + req_params["temperature"] = config.temperature + + stream = await client.chat.completions.create(**req_params) + async for chunk in stream: - if chunk.choices[0].delta.content: - yield chunk.choices[0].delta.content + if chunk.choices and chunk.choices[0].delta: + delta = chunk.choices[0].delta + if delta.content: + yield delta.content + elif delta.tool_calls: + # If the model tries to call a tool (even if we didn't send any?) + # This shouldn't happen unless we sent tools. + # But let's notify the user. + # Or maybe it's just an empty delta at the start/end. + pass + elif getattr(delta, 'refusal', None): + yield f"[Refusal: {delta.refusal}]" -async def stream_google(messages: list[Message], config: LLMConfig) -> AsyncGenerator[str, None]: - configure_google(config.api_key) - model = genai.GenerativeModel(config.model_name) +async def stream_google(messages: list[Message], config: LLMConfig, attachments: List[Dict[str, Any]] | None = None) -> AsyncGenerator[str, None]: + attachments = attachments or [] + # Use new Google GenAI SDK (google-genai) + from google import genai + from google.genai import types - # Google Generative AI history format: - # [{"role": "user", "parts": ["..."]}, {"role": "model", "parts": ["..."]}] - # System prompt is usually set on model init or prepended. + key = config.api_key or os.getenv("GOOGLE_API_KEY") + if not key: + raise ValueError("Google API Key not found") + + client = genai.Client(api_key=key) - history = [] - # If system prompt exists, we might prepend it to the first user message or use specific system instruction if supported - # Gemini 1.5 Pro supports system instructions. For simplicity, let's prepend to history if possible or context. + # Configure Tools (Google Search) + tools = None + if config.enable_google_search: + # Enable Google Search Grounding + tools = [types.Tool(google_search=types.GoogleSearch())] + + # Configure Generation + gen_config = types.GenerateContentConfig( + temperature=config.temperature, + max_output_tokens=config.max_tokens, + system_instruction=config.system_prompt, + tools=tools + ) - system_instruction = config.system_prompt - if system_instruction: - model = genai.GenerativeModel(config.model_name, system_instruction=system_instruction) + # If attachments present, send as a single generate_content call (non-streaming) + if attachments: + parts = [] + for att in attachments: + uri = att.get("uri") + mime = att.get("mime") or "application/octet-stream" + if uri: + try: + parts.append(types.Part.from_uri(uri, mime_type=mime)) + except Exception: + parts.append(types.Part(text=f"[file attached: {uri}]")) + for msg in messages: + parts.append(types.Part(text=msg.content)) + print(f"[gemini] sending attachments: {[att.get('uri') for att in attachments]}") + try: + response = await client.aio.models.generate_content( + model=config.model_name, + contents=[types.Content(role="user", parts=parts)], + config=gen_config + ) + if response and getattr(response, "text", None): + yield response.text + else: + yield "[Error] Gemini response returned no text." + except Exception as e: + yield f"[Error] Gemini call failed: {str(e)}" + return - # Convert messages - # Note: Gemini strictly requires user/model alternation in history usually. - # We will need to handle this. For MVP, we assume the input is clean or we blindly map. - for msg in messages: + # Prepare History + # Extract last message as the prompt + prompt_msg = "..." + history_msgs = messages + if messages and messages[-1].role == Role.USER: + prompt_msg = messages[-1].content + history_msgs = messages[:-1] + + history_content = [] + for msg in history_msgs: role = "user" if msg.role == Role.USER else "model" - history.append({"role": role, "parts": [msg.content]}) - - # The last message should be the prompt, strictly speaking, `chat.send_message` takes the new message - # But if we are treating everything as history... - # Let's separate the last user message as the prompt if possible. + history_content.append(types.Content( + role=role, + parts=[types.Part(text=msg.content)] + )) + + # Use Async Client via .aio + chat_session = client.aio.chats.create( + model=config.model_name, + history=history_content, + config=gen_config + ) + + response_stream = await chat_session.send_message_stream(prompt_msg) - if history and history[-1]["role"] == "user": - last_msg = history.pop() - chat = model.start_chat(history=history) - response_stream = await chat.send_message_async(last_msg["parts"][0], stream=True) - else: - # If the last message is not user, we might be in a weird state. - # Just send an empty prompt or handle error? - # For now, assume the user always provides a prompt in the node. - chat = model.start_chat(history=history) - response_stream = await chat.send_message_async("...", stream=True) # Fallback - async for chunk in response_stream: + # Access text safely if chunk.text: yield chunk.text -async def llm_streamer(context: Context, user_prompt: str, config: LLMConfig) -> AsyncGenerator[str, None]: +async def llm_streamer( + context: Context, + user_prompt: str, + config: LLMConfig, + attachments: List[Dict[str, Any]] | None = None, + tools: List[Dict[str, Any]] | None = None, +) -> AsyncGenerator[str, None]: # 1. Merge Context + New User Prompt # We create a temporary list of messages for this inference messages_to_send = context.messages.copy() @@ -104,13 +340,168 @@ async def llm_streamer(context: Context, user_prompt: str, config: LLMConfig) -> # 2. Call Provider try: if config.provider == "openai": - async for chunk in stream_openai(messages_to_send, config): + async for chunk in stream_openai(messages_to_send, config, attachments, tools): yield chunk elif config.provider == "google": - async for chunk in stream_google(messages_to_send, config): + async for chunk in stream_google(messages_to_send, config, attachments): yield chunk else: yield f"Error: Unsupported provider {config.provider}" except Exception as e: yield f"Error calling LLM: {str(e)}" + +async def generate_title(user_prompt: str, response: str, api_key: str = None) -> str: + """ + Generate a short title (3-4 words) for a Q-A pair using gpt-5-nano. + Uses Responses API (required for gpt-5 series), synchronous mode (no background). + """ + client = get_openai_client(api_key) + + instructions = """TASK: Extract a short topic title from the given Q&A. Do NOT answer the question - only extract the topic. + +Rules: +- Output 2-3 short words OR 2 longer words +- No punctuation, no quotes, no explanation +- Capitalize each word +- Be specific to the topic discussed +- Output ONLY the title, nothing else + +Examples: +Q: "How to sort a list in Python?" -> "Python Sorting" +Q: "What is React state?" -> "React State" +Q: "Explain AWS Lambda pricing" -> "Lambda Pricing" +Q: "Who are you?" -> "AI Identity" +Q: "What's the weather in NYC?" -> "NYC Weather\"""" + + # Truncate to avoid token limits + truncated_prompt = user_prompt[:300] if len(user_prompt) > 300 else user_prompt + truncated_response = response[:300] if len(response) > 300 else response + + input_text = f"Question: {truncated_prompt}\n\nAnswer: {truncated_response}" + + try: + print(f"[generate_title] Called with prompt: {truncated_prompt[:50]}...") + + # Use Responses API for gpt-5-nano (synchronous, no background) + # Note: max_output_tokens includes reasoning tokens, so needs to be higher + resp = await client.responses.create( + model="gpt-5-nano", + input=input_text, + instructions=instructions, + max_output_tokens=500, # Higher to accommodate reasoning tokens + reasoning={"effort": "low"}, # Minimize reasoning for simple task + stream=False + ) + + print(f"[generate_title] Response status: {getattr(resp, 'status', 'unknown')}") + print(f"[generate_title] Response output: {getattr(resp, 'output', 'no output')}") + + # Response should be completed immediately (no polling needed) + if hasattr(resp, 'output'): + for out in resp.output: + if getattr(out, 'type', None) == 'message': + content = getattr(out, 'content', []) + for c in content: + if getattr(c, 'type', None) == 'output_text': + title = getattr(c, 'text', '').strip() + # Clean up + title = title.strip('"\'') + print(f"[generate_title] Extracted title: {title}") + if title: + return title + + print("[generate_title] No title found, returning default") + return "New Question" + + except Exception as e: + print(f"Title generation error: {e}") + return "New Question" + + +async def summarize_content(content: str, model: str, openai_api_key: str = None, gemini_api_key: str = None) -> str: + """ + Summarize the given content using the specified model. + Supports both OpenAI and Gemini models. + """ + instructions = """Summarize the following content concisely. +Keep the key points and main ideas. +Output only the summary, no preamble.""" + + # Truncate very long content + max_content = 8000 + if len(content) > max_content: + content = content[:max_content] + "\n\n[Content truncated...]" + + try: + if model.startswith('gemini'): + # Use Gemini + from google import genai + from google.genai import types + import os + + key = gemini_api_key or os.getenv("GOOGLE_API_KEY") + if not key: + return "Error: Google API Key not found" + + client = genai.Client(api_key=key) + + gen_config = types.GenerateContentConfig( + temperature=0.3, + max_output_tokens=1000, + system_instruction=instructions + ) + + response = await client.aio.models.generate_content( + model=model, + contents=content, + config=gen_config + ) + + return response.text or "No summary generated" + + else: + # Use OpenAI + client = get_openai_client(openai_api_key) + + # Check if model needs Responses API + responses_api_models = [ + 'gpt-5', 'gpt-5-chat-latest', 'gpt-5-mini', 'gpt-5-nano', + 'gpt-5-pro', 'gpt-5.1', 'gpt-5.1-chat-latest', 'o3' + ] + + if model in responses_api_models: + # Use Responses API + resp = await client.responses.create( + model=model, + input=content, + instructions=instructions, + max_output_tokens=2000, + stream=False + ) + + if hasattr(resp, 'output'): + for out in resp.output: + if getattr(out, 'type', None) == 'message': + for c in getattr(out, 'content', []): + if getattr(c, 'type', None) == 'output_text': + return getattr(c, 'text', '') or "No summary generated" + + return "No summary generated" + else: + # Use Chat Completions API + result = await client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": instructions}, + {"role": "user", "content": content} + ], + max_tokens=1000, + temperature=0.3 + ) + + return result.choices[0].message.content or "No summary generated" + + except Exception as e: + print(f"Summarization error: {e}") + return f"Error: {str(e)}" diff --git a/backend/data/test/archive/archived_nodes.json b/backend/data/test/archive/archived_nodes.json new file mode 100644 index 0000000..e74c8f8 --- /dev/null +++ b/backend/data/test/archive/archived_nodes.json @@ -0,0 +1,16 @@ +[ + { + "id": "archive_1765417206000", + "label": "PDF File Count", + "model": "gpt-5.1", + "systemPrompt": "", + "temperature": 0.7, + "reasoningEffort": "medium", + "userPrompt": "testing search_files. Please search for pdf files in this conversation and count the number.", + "response": "I found **1 PDF file** in this conversation: `542proposal.pdf`. ", + "mergeStrategy": "smart", + "attachedFileIds": [ + "dcf69ca5-2efa-4b8a-acec-3864a803f073" + ] + } +]
\ No newline at end of file diff --git a/backend/data/test/files/1d344051-3b8e-41bc-8c57-15710383bd42 b/backend/data/test/files/1d344051-3b8e-41bc-8c57-15710383bd42 Binary files differnew file mode 100644 index 0000000..747674f --- /dev/null +++ b/backend/data/test/files/1d344051-3b8e-41bc-8c57-15710383bd42 diff --git a/backend/data/test/files/23f72737-1d5f-4cab-b68e-8c9010a8fdc4 b/backend/data/test/files/23f72737-1d5f-4cab-b68e-8c9010a8fdc4 Binary files differnew file mode 100644 index 0000000..a837816 --- /dev/null +++ b/backend/data/test/files/23f72737-1d5f-4cab-b68e-8c9010a8fdc4 diff --git a/backend/data/test/files/2d43e6a5-2823-4564-8beb-fef2721056f3 b/backend/data/test/files/2d43e6a5-2823-4564-8beb-fef2721056f3 Binary files differnew file mode 100644 index 0000000..c522464 --- /dev/null +++ b/backend/data/test/files/2d43e6a5-2823-4564-8beb-fef2721056f3 diff --git a/backend/data/test/files/609ece1b-0894-4904-b363-d6e1c7a90be8 b/backend/data/test/files/609ece1b-0894-4904-b363-d6e1c7a90be8 Binary files differnew file mode 100644 index 0000000..f2d6870 --- /dev/null +++ b/backend/data/test/files/609ece1b-0894-4904-b363-d6e1c7a90be8 diff --git a/backend/data/test/files/dcf69ca5-2efa-4b8a-acec-3864a803f073 b/backend/data/test/files/dcf69ca5-2efa-4b8a-acec-3864a803f073 Binary files differnew file mode 100644 index 0000000..0b9b64e --- /dev/null +++ b/backend/data/test/files/dcf69ca5-2efa-4b8a-acec-3864a803f073 diff --git a/backend/data/test/files/index.json b/backend/data/test/files/index.json new file mode 100644 index 0000000..537cbe1 --- /dev/null +++ b/backend/data/test/files/index.json @@ -0,0 +1,75 @@ +[ + { + "id": "dcf69ca5-2efa-4b8a-acec-3864a803f073", + "name": "542proposal.pdf", + "size": 275191, + "mime": "application/pdf", + "created_at": 1765347496.7856553, + "provider": null, + "provider_file_id": null, + "openai_file_id": "file-UTunaRvEEufAK3vMPtC4oh", + "openai_vector_store_id": "vs_693910aa910081919231c2c6b6c69e1d", + "scopes": [ + "untitled/node_1765410411339", + "untitled.json/node_1765320756261", + "untitled (2).json/node_1765320756261" + ] + }, + { + "id": "2d43e6a5-2823-4564-8beb-fef2721056f3", + "name": "ICLR_New (2).pdf", + "size": 4396443, + "mime": "application/pdf", + "created_at": 1765411275.0, + "provider": "openai", + "provider_file_id": "file-KF3avxznCxcDXnZxEEUhTo", + "openai_file_id": "file-KF3avxznCxcDXnZxEEUhTo", + "openai_vector_store_id": "vs_693910aa910081919231c2c6b6c69e1d", + "scopes": [ + "untitled/node_1765410434473", + "untitled (2).json/node_1765411626468", + "untitled (2).json/node_1765412541501", + "untitled (2).json/node_1765413048314" + ] + }, + { + "id": "23f72737-1d5f-4cab-b68e-8c9010a8fdc4", + "name": "survey_on_survey (3).pdf", + "size": 263914, + "mime": "application/pdf", + "created_at": 1765415182.5394356, + "provider": null, + "provider_file_id": null, + "openai_file_id": "file-M1aQQuGPqBYPg48Wf7JfNo", + "openai_vector_store_id": "vs_693910aa910081919231c2c6b6c69e1d", + "scopes": [] + }, + { + "id": "609ece1b-0894-4904-b363-d6e1c7a90be8", + "name": "CV.pdf", + "size": 30337, + "mime": "application/pdf", + "created_at": 1765415665.079434, + "provider": null, + "provider_file_id": null, + "openai_file_id": "file-HB9LZHueJAaj9tNfx5QxvR", + "openai_vector_store_id": "vs_693910aa910081919231c2c6b6c69e1d", + "scopes": [ + "untitled (2).json/quick_chat_temp" + ] + }, + { + "id": "1d344051-3b8e-41bc-8c57-15710383bd42", + "name": "hw4.pdf", + "size": 249853, + "mime": "application/pdf", + "created_at": 1765416241.6634274, + "provider": null, + "provider_file_id": null, + "openai_file_id": "file-UiqdRSQiyhcp4bNbAcuy7j", + "openai_vector_store_id": "vs_693910aa910081919231c2c6b6c69e1d", + "scopes": [ + "untitled (2).json/quick_chat_temp" + ] + } +]
\ No newline at end of file diff --git a/backend/data/test/files/vector_store.json b/backend/data/test/files/vector_store.json new file mode 100644 index 0000000..9b9d8a8 --- /dev/null +++ b/backend/data/test/files/vector_store.json @@ -0,0 +1 @@ +{"id": "vs_693910aa910081919231c2c6b6c69e1d"}
\ No newline at end of file diff --git a/backend/data/test/projects/untitled (2).json b/backend/data/test/projects/untitled (2).json new file mode 100644 index 0000000..5a00f0a --- /dev/null +++ b/backend/data/test/projects/untitled (2).json @@ -0,0 +1,1868 @@ +{ + "version": 1, + "nodes": [ + { + "id": "node_1765320756261", + "type": "llmNode", + "position": { + "x": 100, + "y": 100 + }, + "data": { + "label": "PDF File Count", + "model": "gpt-5.1", + "temperature": 0.7, + "systemPrompt": "", + "userPrompt": "testing search_files. Please search for pdf files in this conversation and count the number.", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "messages": [ + { + "id": "msg_1765413079192_u", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "msg_1765413079192_a", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + } + ], + "traces": [], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + } + ] + } + ], + "forkedTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-u", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-a", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + } + ] + } + ], + "mergedTraces": [], + "response": "I found **1 PDF file** in this conversation: `542proposal.pdf`. ", + "status": "success", + "inputs": 1, + "activeTraceIds": [], + "attachments": [ + { + "fileId": "8a29c29f-fc55-4197-ba61-ac6fa1b5d628", + "name": "507hw_.pdf", + "providerFileId": "file-9ptprToszH4K38Mg3Qu5B6", + "expiresAt": null + } + ], + "attachedFileIds": [ + "dcf69ca5-2efa-4b8a-acec-3864a803f073" + ], + "querySentAt": 1765413043481, + "responseReceivedAt": 1765413079192 + }, + "width": 200, + "height": 64, + "selected": false, + "dragging": false, + "positionAbsolute": { + "x": 109.22690128581917, + "y": 455.2354603148382 + } + }, + { + "id": "node_1765413048314", + "type": "llmNode", + "position": { + "x": 450, + "y": 100 + }, + "data": { + "label": "Pdf Files", + "model": "gpt-5.1", + "temperature": 0.7, + "systemPrompt": "", + "userPrompt": "what about now?", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "msg_1765415155872_u", + "role": "user", + "content": "what about now?" + }, + { + "id": "msg_1765415155872_a", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + } + ], + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "response": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` ", + "status": "success", + "inputs": 1, + "activeTraceIds": [ + "fork-node_1765320756261-1765413083937" + ], + "attachedFileIds": [ + "2d43e6a5-2823-4564-8beb-fef2721056f3" + ], + "querySentAt": 1765415098761, + "responseReceivedAt": 1765415155872 + }, + "width": 200, + "height": 64, + "selected": false, + "positionAbsolute": { + "x": 407.983813671746, + "y": 459.30622770179616 + }, + "dragging": false + }, + { + "id": "node_1765415177931", + "type": "llmNode", + "position": { + "x": 800, + "y": 100 + }, + "data": { + "label": "PDF Files", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "我来多做几轮测试。现在呢?", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [], + "attachedFileIds": [], + "response": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` ", + "status": "success", + "inputs": 1, + "querySentAt": 1765415177931, + "responseReceivedAt": 1765415177931 + }, + "width": 200, + "height": 64 + }, + { + "id": "node_1765415203978", + "type": "llmNode", + "position": { + "x": 1150, + "y": 100 + }, + "data": { + "label": "PDF Files", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "那么现在呢", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [], + "attachedFileIds": [], + "response": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` ", + "status": "success", + "inputs": 1, + "querySentAt": 1765415203978, + "responseReceivedAt": 1765415203978 + }, + "width": 200, + "height": 64 + }, + { + "id": "node_1765415227098", + "type": "llmNode", + "position": { + "x": 1500, + "y": 100 + }, + "data": { + "label": "Citation Markers", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "”fileciteturn0file0turn0file7“这一串字符是什么?", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [], + "attachedFileIds": [], + "response": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。", + "status": "success", + "inputs": 1, + "querySentAt": 1765415227098, + "responseReceivedAt": 1765415227098 + }, + "width": 200, + "height": 64 + }, + { + "id": "node_1765415275416", + "type": "llmNode", + "position": { + "x": 1850, + "y": 100 + }, + "data": { + "label": "OpenAI Citation Rendering", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + }, + { + "id": "node_1765415275416-user", + "role": "user", + "content": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端" + }, + { + "id": "node_1765415275416-assistant", + "role": "assistant", + "content": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。" + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [ + "fork-node_1765320756261-1765413083937" + ], + "attachedFileIds": [], + "response": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。", + "status": "success", + "inputs": 1, + "querySentAt": 1765415275416, + "responseReceivedAt": 1765415275416 + }, + "width": 226, + "height": 64, + "selected": false, + "positionAbsolute": { + "x": 1851.1486983549971, + "y": 100 + }, + "dragging": false + }, + { + "id": "node_1765415709294", + "type": "llmNode", + "position": { + "x": 2200, + "y": 100 + }, + "data": { + "label": "Available PDFs", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "现在呢", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + }, + { + "id": "node_1765415275416-user", + "role": "user", + "content": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端" + }, + { + "id": "node_1765415275416-assistant", + "role": "assistant", + "content": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。" + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + }, + { + "id": "node_1765415275416-user", + "role": "user", + "content": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端" + }, + { + "id": "node_1765415275416-assistant", + "role": "assistant", + "content": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。" + }, + { + "id": "node_1765415709294-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765415709294-assistant", + "role": "assistant", + "content": "现在一共有 **2 个不同的 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n你之前提到的 `542proposal.pdf` 在当前这轮对话中已经不在可用文件列表里了,所以现在只看到这两个。" + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [], + "attachedFileIds": [], + "response": "现在一共有 **2 个不同的 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n你之前提到的 `542proposal.pdf` 在当前这轮对话中已经不在可用文件列表里了,所以现在只看到这两个。", + "status": "success", + "inputs": 1, + "querySentAt": 1765415709294, + "responseReceivedAt": 1765415709294 + }, + "width": 200, + "height": 64 + }, + { + "id": "node_1765415741441", + "type": "llmNode", + "position": { + "x": 2550, + "y": 100 + }, + "data": { + "label": "Accessible PDFs", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "再试试?不应该啊", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + }, + { + "id": "node_1765415275416-user", + "role": "user", + "content": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端" + }, + { + "id": "node_1765415275416-assistant", + "role": "assistant", + "content": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。" + }, + { + "id": "node_1765415709294-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765415709294-assistant", + "role": "assistant", + "content": "现在一共有 **2 个不同的 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n你之前提到的 `542proposal.pdf` 在当前这轮对话中已经不在可用文件列表里了,所以现在只看到这两个。" + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + }, + { + "id": "node_1765415275416-user", + "role": "user", + "content": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端" + }, + { + "id": "node_1765415275416-assistant", + "role": "assistant", + "content": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。" + }, + { + "id": "node_1765415709294-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765415709294-assistant", + "role": "assistant", + "content": "现在一共有 **2 个不同的 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n你之前提到的 `542proposal.pdf` 在当前这轮对话中已经不在可用文件列表里了,所以现在只看到这两个。" + }, + { + "id": "node_1765415741441-user", + "role": "user", + "content": "再试试?不应该啊" + }, + { + "id": "node_1765415741441-assistant", + "role": "assistant", + "content": "现在这一轮里,我能看到并访问到的 PDF 文件有 **2 个**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n之前的 `542proposal.pdf` 当前这次工具返回的列表里已经没有了,所以现在只统计到这两个。" + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [ + "fork-node_1765320756261-1765413083937" + ], + "attachedFileIds": [], + "response": "现在这一轮里,我能看到并访问到的 PDF 文件有 **2 个**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n之前的 `542proposal.pdf` 当前这次工具返回的列表里已经没有了,所以现在只统计到这两个。", + "status": "success", + "inputs": 1, + "querySentAt": 1765415741441, + "responseReceivedAt": 1765415741441 + }, + "width": 200, + "height": 64, + "selected": false, + "dragging": false + }, + { + "id": "node_1765416202831", + "type": "llmNode", + "position": { + "x": 2850, + "y": 100 + }, + "data": { + "label": "CV PDF", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "现在呢", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + }, + { + "id": "node_1765415275416-user", + "role": "user", + "content": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端" + }, + { + "id": "node_1765415275416-assistant", + "role": "assistant", + "content": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。" + }, + { + "id": "node_1765415709294-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765415709294-assistant", + "role": "assistant", + "content": "现在一共有 **2 个不同的 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n你之前提到的 `542proposal.pdf` 在当前这轮对话中已经不在可用文件列表里了,所以现在只看到这两个。" + }, + { + "id": "node_1765415741441-user", + "role": "user", + "content": "再试试?不应该啊" + }, + { + "id": "node_1765415741441-assistant", + "role": "assistant", + "content": "现在这一轮里,我能看到并访问到的 PDF 文件有 **2 个**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n之前的 `542proposal.pdf` 当前这次工具返回的列表里已经没有了,所以现在只统计到这两个。" + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + }, + { + "id": "node_1765415275416-user", + "role": "user", + "content": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端" + }, + { + "id": "node_1765415275416-assistant", + "role": "assistant", + "content": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。" + }, + { + "id": "node_1765415709294-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765415709294-assistant", + "role": "assistant", + "content": "现在一共有 **2 个不同的 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n你之前提到的 `542proposal.pdf` 在当前这轮对话中已经不在可用文件列表里了,所以现在只看到这两个。" + }, + { + "id": "node_1765415741441-user", + "role": "user", + "content": "再试试?不应该啊" + }, + { + "id": "node_1765415741441-assistant", + "role": "assistant", + "content": "现在这一轮里,我能看到并访问到的 PDF 文件有 **2 个**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n之前的 `542proposal.pdf` 当前这次工具返回的列表里已经没有了,所以现在只统计到这两个。" + }, + { + "id": "node_1765416202831-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765416202831-assistant", + "role": "assistant", + "content": "现在我能看到的 PDF 文件一共有 **1 个**:\n\n1. `CV.pdf` " + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [], + "attachedFileIds": [ + "609ece1b-0894-4904-b363-d6e1c7a90be8" + ], + "response": "现在我能看到的 PDF 文件一共有 **1 个**:\n\n1. `CV.pdf` ", + "status": "success", + "inputs": 1, + "querySentAt": 1765416202831, + "responseReceivedAt": 1765416202831 + }, + "width": 200, + "height": 64, + "selected": false + }, + { + "id": "node_1765416232562", + "type": "llmNode", + "position": { + "x": 3150, + "y": 100 + }, + "data": { + "label": "CV PDF", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "现在呢", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + }, + { + "id": "node_1765415275416-user", + "role": "user", + "content": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端" + }, + { + "id": "node_1765415275416-assistant", + "role": "assistant", + "content": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。" + }, + { + "id": "node_1765415709294-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765415709294-assistant", + "role": "assistant", + "content": "现在一共有 **2 个不同的 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n你之前提到的 `542proposal.pdf` 在当前这轮对话中已经不在可用文件列表里了,所以现在只看到这两个。" + }, + { + "id": "node_1765415741441-user", + "role": "user", + "content": "再试试?不应该啊" + }, + { + "id": "node_1765415741441-assistant", + "role": "assistant", + "content": "现在这一轮里,我能看到并访问到的 PDF 文件有 **2 个**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n之前的 `542proposal.pdf` 当前这次工具返回的列表里已经没有了,所以现在只统计到这两个。" + }, + { + "id": "node_1765416202831-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765416202831-assistant", + "role": "assistant", + "content": "现在我能看到的 PDF 文件一共有 **1 个**:\n\n1. `CV.pdf` " + } + ] + } + ], + "outgoingTraces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + }, + { + "id": "node_1765415275416-user", + "role": "user", + "content": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端" + }, + { + "id": "node_1765415275416-assistant", + "role": "assistant", + "content": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。" + }, + { + "id": "node_1765415709294-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765415709294-assistant", + "role": "assistant", + "content": "现在一共有 **2 个不同的 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n你之前提到的 `542proposal.pdf` 在当前这轮对话中已经不在可用文件列表里了,所以现在只看到这两个。" + }, + { + "id": "node_1765415741441-user", + "role": "user", + "content": "再试试?不应该啊" + }, + { + "id": "node_1765415741441-assistant", + "role": "assistant", + "content": "现在这一轮里,我能看到并访问到的 PDF 文件有 **2 个**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n之前的 `542proposal.pdf` 当前这次工具返回的列表里已经没有了,所以现在只统计到这两个。" + }, + { + "id": "node_1765416202831-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765416202831-assistant", + "role": "assistant", + "content": "现在我能看到的 PDF 文件一共有 **1 个**:\n\n1. `CV.pdf` " + }, + { + "id": "node_1765416232562-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765416232562-assistant", + "role": "assistant", + "content": "现在我能看到的 PDF 文件一共有 **1 个**:\n\n1. `CV.pdf` " + } + ] + } + ], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [ + "fork-node_1765320756261-1765413083937" + ], + "attachedFileIds": [], + "response": "现在我能看到的 PDF 文件一共有 **1 个**:\n\n1. `CV.pdf` ", + "status": "success", + "inputs": 1, + "querySentAt": 1765416232562, + "responseReceivedAt": 1765416232562 + }, + "width": 200, + "height": 64, + "selected": false, + "dragging": false + }, + { + "id": "node_1765416288572", + "type": "llmNode", + "position": { + "x": 3450, + "y": 100 + }, + "data": { + "label": "PDF 文件 去重", + "model": "gpt-5.1", + "temperature": 1, + "systemPrompt": "", + "userPrompt": "现在呢?", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "enableGoogleSearch": true, + "traces": [ + { + "id": "fork-node_1765320756261-1765413083937", + "sourceNodeId": "node_1765320756261", + "color": "hsl(163, 70%, 60%)", + "messages": [ + { + "id": "node_1765320756261-user", + "role": "user", + "content": "testing search_files. Please search for pdf files in this conversation and count the number." + }, + { + "id": "node_1765320756261-assistant", + "role": "assistant", + "content": "I found **1 PDF file** in this conversation: `542proposal.pdf`. " + }, + { + "id": "node_1765413048314-user", + "role": "user", + "content": "what about now?" + }, + { + "id": "node_1765413048314-assistant", + "role": "assistant", + "content": "There are now **2 PDF files** available in this conversation:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415177931-user", + "role": "user", + "content": "我来多做几轮测试。现在呢?" + }, + { + "id": "node_1765415177931-assistant", + "role": "assistant", + "content": "现在一共存在 **2 个 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `542proposal.pdf` " + }, + { + "id": "node_1765415203978-user", + "role": "user", + "content": "那么现在呢" + }, + { + "id": "node_1765415203978-assistant", + "role": "assistant", + "content": "现在一共有 **3 个 PDF 文件**:\n\n1. `survey_on_survey (3).pdf` fileciteturn0file0turn0file7 \n2. `ICLR_New (2).pdf` fileciteturn0file1turn0file8 \n3. `542proposal.pdf` " + }, + { + "id": "node_1765415227098-user", + "role": "user", + "content": "”fileciteturn0file0turn0file7“这一串字符是什么?" + }, + { + "id": "node_1765415227098-assistant", + "role": "assistant", + "content": "那一串 \n`fileciteturn0file0turn0file7` \n\n是我用来**标注文件来源的内部引用标记(citation marker)**,含义大概是:\n\n- `filecite`:说明这是一个“文件引用”的开始 \n- `turn0file0`、`turn0file7`:指向本对话中由你上传的具体文件(第 0 轮的第 0 个文件、第 7 个文件之类的内部编号) \n- ``:引用标记的结束符 \n\n这些标记主要是给系统用来“知道这句话是根据哪些文件内容回答的”,对你来说可以把它当成**文献引用**一样的东西,并不属于 PDF 文件的正文内容。" + }, + { + "id": "node_1765415275416-user", + "role": "user", + "content": "openai平时是怎么渲染这个引用字符的?我在自己搭建前端" + }, + { + "id": "node_1765415275416-assistant", + "role": "assistant", + "content": "简单说: \n这些 `filecite...` 不应该直接展示给用户看,前端要**拦截并自己渲染成“引用小标签”**。\n\n虽然 OpenAI 没有公开一份“官方前端实现规范”,但在 ChatGPT 里,大致是这样处理的:\n\n1. **模型返回的原始文本里带有这串标记**: \n `…… survey_on_survey (3).pdf fileciteturn0file0turn0file7` \n\n2. **前端在展示前做一次解析**: \n - 用正则把所有 `filecite ... ` 找出来 \n - 拆成若干个 id:`turn0file0`, `turn0file7` \n - 根据这些 id 去你自己的文件元数据表里拿:文件名、类型、页码链接等 \n\n3. **把这段标记替换为你想要的 UI 元素**,比如:\n - 一个小上标的引用号:`[1]`、`[2]` \n - 或一个带文件名的 tag / pill:`[survey_on_survey (3).pdf]` \n - 鼠标 hover / 点击后弹出:**文件名 + “打开文件 / 跳转到页码”按钮**\n\n示例实现思路(伪代码):\n\n```ts\nconst CITATION_RE = /filecite([^]+)/g;\n\nfunction renderWithCitations(text, fileMetaMap) {\n return text.replace(CITATION_RE, (match, inner) => {\n const ids = inner.split(\"\"); // [\"turn0file0\", \"turn0file7\"]\n // 这里你可以把 ids 映射成编号 [1] / [2],或直接显示文件名\n const label = \"[\" + ids.map(id => fileMetaMap[id]?.displayIndex ?? \"?\").join(\",\") + \"]\";\n // 实际前端中可以渲染成组件,这里用纯文本代替\n return label;\n });\n}\n```\n\n所以在自己搭前端时,可以按这个思路做:\n\n- 把 `filecite...` 当成**纯结构化标记** \n- 前端负责把它转换成:**引用编号 / tag / 弹窗等 UI** \n- 用户最终不会看到这些奇怪字符,只看到你设计的引用样式即可。" + }, + { + "id": "node_1765415709294-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765415709294-assistant", + "role": "assistant", + "content": "现在一共有 **2 个不同的 PDF 文件**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n你之前提到的 `542proposal.pdf` 在当前这轮对话中已经不在可用文件列表里了,所以现在只看到这两个。" + }, + { + "id": "node_1765415741441-user", + "role": "user", + "content": "再试试?不应该啊" + }, + { + "id": "node_1765415741441-assistant", + "role": "assistant", + "content": "现在这一轮里,我能看到并访问到的 PDF 文件有 **2 个**:\n\n1. `ICLR_New (2).pdf` \n2. `survey_on_survey (3).pdf` \n\n之前的 `542proposal.pdf` 当前这次工具返回的列表里已经没有了,所以现在只统计到这两个。" + }, + { + "id": "node_1765416202831-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765416202831-assistant", + "role": "assistant", + "content": "现在我能看到的 PDF 文件一共有 **1 个**:\n\n1. `CV.pdf` " + }, + { + "id": "node_1765416232562-user", + "role": "user", + "content": "现在呢" + }, + { + "id": "node_1765416232562-assistant", + "role": "assistant", + "content": "现在我能看到的 PDF 文件一共有 **1 个**:\n\n1. `CV.pdf` " + } + ] + } + ], + "outgoingTraces": [], + "forkedTraces": [], + "mergedTraces": [], + "activeTraceIds": [ + "fork-node_1765320756261-1765413083937" + ], + "attachedFileIds": [ + "1d344051-3b8e-41bc-8c57-15710383bd42" + ], + "response": "现在我能看到的 PDF 文件一共有 **2 个不同的 PDF 文件**(按文件 ID 去重后):\n\n1. `hw4.pdf` fileciteturn0file0turn0file1turn0file2turn0file4turn0file5 \n2. `CV.pdf` \n\n之前搜索结果里之所以出现多个 `hw4.pdf`,是因为它在检索时被拆成了多个内容片段,但它们指向的是**同一个文件**,所以只算 1 个。", + "status": "success", + "inputs": 1, + "querySentAt": 1765416288573, + "responseReceivedAt": 1765416288573 + }, + "width": 200, + "height": 64, + "selected": true, + "dragging": false + } + ], + "edges": [ + { + "source": "node_1765320756261", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765413048314", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765320756261trace-fork-node_1765320756261-1765413083937-node_1765413048314input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765413048314", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765415177931", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765413048314trace-fork-node_1765320756261-1765413083937-node_1765415177931input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765415177931", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765415203978", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765415177931trace-fork-node_1765320756261-1765413083937-node_1765415203978input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765415203978", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765415227098", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765415203978trace-fork-node_1765320756261-1765413083937-node_1765415227098input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765415227098", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765415275416", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765415227098trace-fork-node_1765320756261-1765413083937-node_1765415275416input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765415275416", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765415709294", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765415275416trace-fork-node_1765320756261-1765413083937-node_1765415709294input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765415709294", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765415741441", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765415709294trace-fork-node_1765320756261-1765413083937-node_1765415741441input-0", + "data": { + "isMerged": false, + "colors": [] + } + }, + { + "source": "node_1765415741441", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765416202831", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765415741441trace-fork-node_1765320756261-1765413083937-node_1765416202831input-0", + "data": { + "isMerged": false, + "colors": [] + }, + "selected": false + }, + { + "source": "node_1765416202831", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765416232562", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765416202831trace-fork-node_1765320756261-1765413083937-node_1765416232562input-0", + "data": { + "isMerged": false, + "colors": [] + }, + "selected": false + }, + { + "source": "node_1765416232562", + "sourceHandle": "trace-fork-node_1765320756261-1765413083937", + "target": "node_1765416288572", + "targetHandle": "input-0", + "style": { + "stroke": "hsl(163, 70%, 60%)", + "strokeWidth": 2 + }, + "id": "reactflow__edge-node_1765416232562trace-fork-node_1765320756261-1765413083937-node_1765416288572input-0", + "data": { + "isMerged": false, + "colors": [] + }, + "selected": false + } + ], + "viewport": { + "x": -1084.9451137267076, + "y": 276.49831226111826, + "zoom": 0.5 + }, + "theme": "light" +}
\ No newline at end of file diff --git a/backend/data/test/projects/untitled.json b/backend/data/test/projects/untitled.json new file mode 100644 index 0000000..592b817 --- /dev/null +++ b/backend/data/test/projects/untitled.json @@ -0,0 +1,40 @@ +{ + "version": 1, + "nodes": [ + { + "id": "node_1765320756261", + "type": "llmNode", + "position": { + "x": 110.96800241241141, + "y": 456.9765614414304 + }, + "data": { + "label": "New Question", + "model": "gpt-5.1", + "temperature": 0.7, + "systemPrompt": "", + "userPrompt": "", + "mergeStrategy": "smart", + "reasoningEffort": "medium", + "messages": [], + "traces": [], + "outgoingTraces": [], + "forkedTraces": [], + "mergedTraces": [], + "response": "", + "status": "idle", + "inputs": 1, + "activeTraceIds": [] + }, + "width": 200, + "height": 64 + } + ], + "edges": [], + "viewport": { + "x": 412.49738834764497, + "y": 114.22860771179165, + "zoom": 0.5743491774985177 + }, + "theme": "light" +}
\ No newline at end of file diff --git a/backend/data/users.db b/backend/data/users.db Binary files differnew file mode 100644 index 0000000..9630889 --- /dev/null +++ b/backend/data/users.db diff --git a/backend/requirements.txt b/backend/requirements.txt index 545f6b7..a9607fd 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,8 +1,13 @@ fastapi uvicorn -pydantic +pydantic[email] openai google-generativeai python-dotenv httpx +python-multipart +# Authentication +python-jose[cryptography] +passlib[bcrypt] +sqlalchemy |
