diff options
Diffstat (limited to 'backend')
| -rw-r--r-- | backend/app/main.py | 162 | ||||
| -rw-r--r-- | backend/app/schemas.py | 18 | ||||
| -rw-r--r-- | backend/app/services/council.py | 322 | ||||
| -rw-r--r-- | backend/app/services/llm.py | 29 |
4 files changed, 525 insertions, 6 deletions
diff --git a/backend/app/main.py b/backend/app/main.py index d48ec89..9370a32 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -8,8 +8,9 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, FileResponse from fastapi import UploadFile, File, Form from pydantic import BaseModel -from app.schemas import NodeRunRequest, NodeRunResponse, MergeStrategy, Role, Message, Context, LLMConfig, ModelProvider, ReasoningEffort -from app.services.llm import llm_streamer, generate_title, get_openai_client, get_anthropic_client +from app.schemas import NodeRunRequest, NodeRunResponse, MergeStrategy, Role, Message, Context, LLMConfig, ModelProvider, ReasoningEffort, CouncilRunRequest +from app.services.llm import llm_streamer, generate_title, get_openai_client, get_anthropic_client, resolve_provider +from app.services.council import council_event_stream from app.auth import auth_router, get_current_user, get_current_user_optional, init_db, User, get_db from app.auth.utils import get_password_hash from dotenv import load_dotenv @@ -421,17 +422,19 @@ async def run_node_stream( tools.append(tool_def) logger.debug("openai file_search: vs_ids=%s refs=%s filters=%s", vs_ids, debug_refs, filters) elif request.config.provider == ModelProvider.GOOGLE: + scoped_ids = resolve_scoped_file_ids(username, request.scopes, non_image_file_ids) attachments = await prepare_attachments( user=username, target_provider=request.config.provider, - attached_ids=non_image_file_ids, + attached_ids=scoped_ids, llm_config=request.config, ) elif request.config.provider == ModelProvider.CLAUDE: + scoped_ids = resolve_scoped_file_ids(username, request.scopes, non_image_file_ids) attachments = await prepare_attachments( user=username, target_provider=request.config.provider, - attached_ids=non_image_file_ids, + attached_ids=scoped_ids, llm_config=request.config, ) @@ -442,6 +445,127 @@ async def run_node_stream( media_type="text/event-stream" ) +@app.post("/api/run_council_stream") +async def run_council_stream( + request: CouncilRunRequest, + user: str = DEFAULT_USER, + current_user: User | None = Depends(get_current_user_optional), +): + """ + Run the 3-stage LLM Council and stream SSE events. + """ + resolved = resolve_user(current_user, user) + username = resolved.username if resolved else DEFAULT_USER + + # Merge incoming contexts (same logic as run_node_stream) + raw_messages = [] + for ctx in request.incoming_contexts: + raw_messages.extend(ctx.messages) + if request.merge_strategy == MergeStrategy.SMART: + final_messages = smart_merge_messages(raw_messages) + else: + final_messages = raw_messages + execution_context = Context(messages=final_messages) + + # Extract images from attached files + images, non_image_file_ids = extract_image_attachments(username, request.attached_file_ids) + + openrouter_key = get_user_api_key(resolved, "openrouter") + + # Build LLMConfig + attachments for each council member + member_configs: list[LLMConfig] = [] + attachments_per_model: list[list[dict] | None] = [] + tools_per_model: list[list[dict] | None] = [] + + all_model_names = [m.model_name for m in request.council_models] + [request.chairman_model] + + for member in request.council_models: + provider = resolve_provider(member.model_name) + provider_str = provider.value + api_key = get_user_api_key(resolved, provider_str) + + config = LLMConfig( + provider=provider, + model_name=member.model_name, + temperature=request.temperature, + system_prompt=request.system_prompt, + api_key=api_key, + reasoning_effort=request.reasoning_effort, + ) + member_configs.append(config) + + # Prepare provider-specific file attachments + tools: list[dict] = [] + attachments: list[dict] = [] + + # For Google/Claude: resolve scope-based files so upstream attachments are visible + scoped_file_ids = resolve_scoped_file_ids(username, request.scopes, non_image_file_ids) + + if provider == ModelProvider.OPENAI: + vs_ids, debug_refs, filters = await prepare_openai_vector_search( + user=username, + attached_ids=non_image_file_ids, + scopes=request.scopes, + llm_config=config, + ) + if not vs_ids: + try: + client = get_openai_client(config.api_key) + vs_id = await ensure_user_vector_store(username, client) + if vs_id: + vs_ids = [vs_id] + except Exception: + pass + if vs_ids: + tool_def = {"type": "file_search", "vector_store_ids": vs_ids} + if filters: + tool_def["filters"] = filters + tools.append(tool_def) + elif provider == ModelProvider.GOOGLE: + attachments = await prepare_attachments( + user=username, + target_provider=provider, + attached_ids=scoped_file_ids, + llm_config=config, + ) + elif provider == ModelProvider.CLAUDE: + attachments = await prepare_attachments( + user=username, + target_provider=provider, + attached_ids=scoped_file_ids, + llm_config=config, + ) + + attachments_per_model.append(attachments or None) + tools_per_model.append(tools or None) + + # Build chairman config + chairman_provider = resolve_provider(request.chairman_model) + chairman_api_key = get_user_api_key(resolved, chairman_provider.value) + chairman_config = LLMConfig( + provider=chairman_provider, + model_name=request.chairman_model, + temperature=request.temperature, + system_prompt=request.system_prompt, + api_key=chairman_api_key, + reasoning_effort=request.reasoning_effort, + ) + + return StreamingResponse( + council_event_stream( + user_prompt=request.user_prompt, + context=execution_context, + member_configs=member_configs, + chairman_config=chairman_config, + attachments_per_model=attachments_per_model, + tools_per_model=tools_per_model, + openrouter_api_key=openrouter_key, + images=images, + ), + media_type="text/event-stream", + ) + + class TitleRequest(BaseModel): user_prompt: str response: str @@ -832,6 +956,36 @@ def save_files_index(user: str, items: List[FileMeta]): json.dump([item.model_dump() for item in items], f, ensure_ascii=False, indent=2) +def resolve_scoped_file_ids(user: str, scopes: List[str], explicit_ids: List[str]) -> List[str]: + """ + Resolve file IDs that are relevant to the given scopes (upstream nodes). + Combines scope-matched files with explicitly attached files. + This gives Google/Claude the same scope awareness that OpenAI gets via file_search. + """ + if not scopes and not explicit_ids: + return [] + + items = load_files_index(user) + result_ids: dict[str, bool] = {} + + # Add explicitly attached files first + for fid in explicit_ids: + result_ids[fid] = True + + # Add files whose scopes intersect with requested scopes (skip images) + if scopes: + for item in items: + if item.id in result_ids: + continue + if item.mime in IMAGE_MIME_TYPES: + continue + if item.scopes and any(s in scopes for s in item.scopes): + result_ids[item.id] = True + logger.debug("resolve_scoped_file_ids: scope match %s -> %s", item.name, item.id) + + return list(result_ids.keys()) + + async def _check_google_file_active(uri_or_name: str, api_key: str = None) -> bool: """Check if a Google file reference is still ACTIVE (not expired).""" key = api_key or os.getenv("GOOGLE_API_KEY") diff --git a/backend/app/schemas.py b/backend/app/schemas.py index 8e5f12c..a527004 100644 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -34,7 +34,7 @@ class LLMConfig(BaseModel): provider: ModelProvider model_name: str temperature: float = 0.7 - max_tokens: int = 1000 + max_tokens: Optional[int] = None system_prompt: Optional[str] = None api_key: Optional[str] = None # Optional override, usually from env enable_google_search: bool = False @@ -55,6 +55,22 @@ class NodeRunRequest(BaseModel): # Contains all project/node combinations in the current trace scopes: List[str] = Field(default_factory=list) +class CouncilMemberConfig(BaseModel): + model_name: str # e.g. "gpt-5", "claude-opus-4-6", "gemini-3-pro-preview" + +class CouncilRunRequest(BaseModel): + node_id: str + incoming_contexts: List[Context] = [] + user_prompt: str + council_models: List[CouncilMemberConfig] # 2-6 models + chairman_model: str # Model name for synthesis + system_prompt: Optional[str] = None + temperature: float = 0.7 + reasoning_effort: ReasoningEffort = ReasoningEffort.MEDIUM + merge_strategy: MergeStrategy = MergeStrategy.SMART + attached_file_ids: List[str] = Field(default_factory=list) + scopes: List[str] = Field(default_factory=list) + class NodeRunResponse(BaseModel): node_id: str output_context: Context diff --git a/backend/app/services/council.py b/backend/app/services/council.py new file mode 100644 index 0000000..d177f44 --- /dev/null +++ b/backend/app/services/council.py @@ -0,0 +1,322 @@ +"""3-stage LLM Council orchestration for ContextFlow.""" + +import asyncio +import json +import logging +import re +from collections import defaultdict +from typing import AsyncGenerator, Dict, List, Any, Optional, Tuple + +from app.schemas import Context, LLMConfig +from app.services.llm import query_model_full, llm_streamer + +logger = logging.getLogger("contextflow.council") + + +async def stage1_collect_responses( + user_prompt: str, + context: Context, + configs: List[LLMConfig], + attachments_per_model: Optional[List[Optional[List[Dict[str, Any]]]]] = None, + tools_per_model: Optional[List[Optional[List[Dict[str, Any]]]]] = None, + openrouter_api_key: Optional[str] = None, + images: Optional[List[Dict[str, Any]]] = None, +) -> AsyncGenerator[Dict[str, Any], None]: + """ + Stage 1: Query all council member models in parallel. + Yields events as each model completes. + Returns final list via stage1_complete event. + """ + async def _query_one(idx: int, config: LLMConfig) -> Dict[str, Any]: + atts = attachments_per_model[idx] if attachments_per_model else None + tls = tools_per_model[idx] if tools_per_model else None + try: + response = await query_model_full( + context, user_prompt, config, + attachments=atts, tools=tls, + openrouter_api_key=openrouter_api_key, images=images, + ) + return {"model": config.model_name, "response": response} + except Exception as e: + logger.error("Council stage1 failed for %s: %s", config.model_name, e) + return {"model": config.model_name, "response": f"[Error: {e}]"} + + # Launch all queries concurrently, yield as each completes + tasks = { + asyncio.ensure_future(_query_one(i, cfg)): i + for i, cfg in enumerate(configs) + } + results: List[Optional[Dict[str, Any]]] = [None] * len(configs) + + for coro in asyncio.as_completed(tasks.keys()): + result = await coro + idx = tasks[[t for t in tasks if t.done() and t.result() is result][0]] + results[idx] = result + yield result # caller sends stage1_model_complete event + + # Not yielded — caller collects via the individual yields + + +def _build_ranking_prompt(user_query: str, stage1_results: List[Dict[str, Any]]) -> str: + """Build the anonymized ranking prompt for Stage 2.""" + labels = [chr(65 + i) for i in range(len(stage1_results))] # A, B, C, ... + + responses_text = "\n\n".join([ + f"Response {label}:\n{result['response']}" + for label, result in zip(labels, stage1_results) + ]) + + return f"""You are evaluating different responses to the following question: + +Question: {user_query} + +Here are the responses from different models (anonymized): + +{responses_text} + +Your task: +1. First, evaluate each response individually. For each response, explain what it does well and what it does poorly. +2. Then, at the very end of your response, provide a final ranking. + +IMPORTANT: Your final ranking MUST be formatted EXACTLY as follows: +- Start with the line "FINAL RANKING:" (all caps, with colon) +- Then list the responses from best to worst as a numbered list +- Each line should be: number, period, space, then ONLY the response label (e.g., "1. Response A") +- Do not add any other text or explanations in the ranking section + +Example of the correct format for your ENTIRE response: + +Response A provides good detail on X but misses Y... +Response B is accurate but lacks depth on Z... +Response C offers the most comprehensive answer... + +FINAL RANKING: +1. Response C +2. Response A +3. Response B + +Now provide your evaluation and ranking:""" + + +def parse_ranking_from_text(ranking_text: str) -> List[str]: + """Parse the FINAL RANKING section from the model's response.""" + if "FINAL RANKING:" in ranking_text: + parts = ranking_text.split("FINAL RANKING:") + if len(parts) >= 2: + ranking_section = parts[1] + numbered_matches = re.findall(r'\d+\.\s*Response [A-Z]', ranking_section) + if numbered_matches: + return [re.search(r'Response [A-Z]', m).group() for m in numbered_matches] + matches = re.findall(r'Response [A-Z]', ranking_section) + return matches + + matches = re.findall(r'Response [A-Z]', ranking_text) + return matches + + +async def stage2_collect_rankings( + user_query: str, + stage1_results: List[Dict[str, Any]], + configs: List[LLMConfig], + openrouter_api_key: Optional[str] = None, +) -> Tuple[List[Dict[str, Any]], Dict[str, str]]: + """ + Stage 2: Each model ranks the anonymized responses. + Text-only prompts, no file attachments. + """ + labels = [chr(65 + i) for i in range(len(stage1_results))] + label_to_model = { + f"Response {label}": result['model'] + for label, result in zip(labels, stage1_results) + } + + ranking_prompt = _build_ranking_prompt(user_query, stage1_results) + empty_context = Context(messages=[]) + + async def _rank_one(config: LLMConfig) -> Dict[str, Any]: + try: + response = await query_model_full( + empty_context, ranking_prompt, config, + openrouter_api_key=openrouter_api_key, + ) + parsed = parse_ranking_from_text(response) + return { + "model": config.model_name, + "ranking": response, + "parsed_ranking": parsed, + } + except Exception as e: + logger.error("Council stage2 failed for %s: %s", config.model_name, e) + return { + "model": config.model_name, + "ranking": f"[Error: {e}]", + "parsed_ranking": [], + } + + results = await asyncio.gather(*[_rank_one(cfg) for cfg in configs]) + return list(results), label_to_model + + +def calculate_aggregate_rankings( + stage2_results: List[Dict[str, Any]], + label_to_model: Dict[str, str], +) -> List[Dict[str, Any]]: + """Calculate aggregate rankings across all models.""" + model_positions: Dict[str, List[int]] = defaultdict(list) + + for ranking in stage2_results: + parsed_ranking = ranking.get("parsed_ranking", []) + if not parsed_ranking: + parsed_ranking = parse_ranking_from_text(ranking.get("ranking", "")) + for position, label in enumerate(parsed_ranking, start=1): + if label in label_to_model: + model_name = label_to_model[label] + model_positions[model_name].append(position) + + aggregate = [] + for model, positions in model_positions.items(): + if positions: + avg_rank = sum(positions) / len(positions) + aggregate.append({ + "model": model, + "average_rank": round(avg_rank, 2), + "rankings_count": len(positions), + }) + + aggregate.sort(key=lambda x: x["average_rank"]) + return aggregate + + +def _build_chairman_prompt( + user_query: str, + stage1_results: List[Dict[str, Any]], + stage2_results: List[Dict[str, Any]], +) -> str: + """Build the chairman synthesis prompt for Stage 3.""" + stage1_text = "\n\n".join([ + f"Model: {result['model']}\nResponse: {result['response']}" + for result in stage1_results + ]) + + stage2_text = "\n\n".join([ + f"Model: {result['model']}\nRanking: {result['ranking']}" + for result in stage2_results + ]) + + return f"""You are the Chairman of an LLM Council. Multiple AI models have provided responses to a user's question, and then ranked each other's responses. + +Original Question: {user_query} + +STAGE 1 - Individual Responses: +{stage1_text} + +STAGE 2 - Peer Rankings: +{stage2_text} + +Your task as Chairman is to synthesize all of this information into a single, comprehensive, accurate answer to the user's original question. Consider: +- The individual responses and their insights +- The peer rankings and what they reveal about response quality +- Any patterns of agreement or disagreement + +Provide a clear, well-reasoned final answer that represents the council's collective wisdom:""" + + +async def stage3_stream_synthesis( + user_query: str, + stage1_results: List[Dict[str, Any]], + stage2_results: List[Dict[str, Any]], + chairman_config: LLMConfig, + openrouter_api_key: Optional[str] = None, +) -> AsyncGenerator[str, None]: + """ + Stage 3: Chairman synthesizes final answer. Streams text chunks. + """ + chairman_prompt = _build_chairman_prompt(user_query, stage1_results, stage2_results) + empty_context = Context(messages=[]) + + async for chunk in llm_streamer( + empty_context, chairman_prompt, chairman_config, + openrouter_api_key=openrouter_api_key, + ): + yield chunk + + +def _sse_event(data: dict) -> str: + """Format a dict as an SSE data line.""" + return f"data: {json.dumps(data)}\n\n" + + +async def council_event_stream( + user_prompt: str, + context: Context, + member_configs: List[LLMConfig], + chairman_config: LLMConfig, + attachments_per_model: Optional[List[Optional[List[Dict[str, Any]]]]] = None, + tools_per_model: Optional[List[Optional[List[Dict[str, Any]]]]] = None, + openrouter_api_key: Optional[str] = None, + images: Optional[List[Dict[str, Any]]] = None, +) -> AsyncGenerator[str, None]: + """ + Master orchestrator yielding SSE JSON events through the 3-stage council process. + """ + # === Stage 1 === + yield _sse_event({"type": "stage1_start"}) + + stage1_results: List[Dict[str, Any]] = [] + async for result in stage1_collect_responses( + user_prompt, context, member_configs, + attachments_per_model=attachments_per_model, + tools_per_model=tools_per_model, + openrouter_api_key=openrouter_api_key, + images=images, + ): + stage1_results.append(result) + yield _sse_event({ + "type": "stage1_model_complete", + "data": {"model": result["model"], "response": result["response"]}, + }) + + yield _sse_event({"type": "stage1_complete", "data": stage1_results}) + + if not stage1_results: + yield _sse_event({ + "type": "error", + "data": {"message": "All council models failed to respond."}, + }) + return + + # === Stage 2 === + yield _sse_event({"type": "stage2_start"}) + + stage2_results, label_to_model = await stage2_collect_rankings( + user_prompt, stage1_results, member_configs, + openrouter_api_key=openrouter_api_key, + ) + aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model) + + yield _sse_event({ + "type": "stage2_complete", + "data": { + "rankings": stage2_results, + "label_to_model": label_to_model, + "aggregate_rankings": aggregate_rankings, + }, + }) + + # === Stage 3 (streamed) === + yield _sse_event({"type": "stage3_start"}) + + full_response = "" + async for chunk in stage3_stream_synthesis( + user_prompt, stage1_results, stage2_results, chairman_config, + openrouter_api_key=openrouter_api_key, + ): + full_response += chunk + yield _sse_event({"type": "stage3_chunk", "data": {"chunk": chunk}}) + + yield _sse_event({ + "type": "stage3_complete", + "data": {"model": chairman_config.model_name, "response": full_response}, + }) + + yield _sse_event({"type": "complete"}) diff --git a/backend/app/services/llm.py b/backend/app/services/llm.py index 7efdce0..c22ada3 100644 --- a/backend/app/services/llm.py +++ b/backend/app/services/llm.py @@ -4,7 +4,7 @@ from typing import AsyncGenerator, List, Dict, Any, Optional import openai import google.generativeai as genai import anthropic -from app.schemas import LLMConfig, Message, Role, Context +from app.schemas import LLMConfig, Message, Role, Context, ModelProvider logger = logging.getLogger("contextflow.llm") @@ -599,6 +599,33 @@ async def llm_streamer( yield f"Error calling LLM: {primary_error} (OpenRouter fallback also failed: {fallback_error})" +def resolve_provider(model_name: str) -> ModelProvider: + """Determine the provider from a model name string.""" + name = model_name.lower() + if any(name.startswith(p) for p in ('claude',)): + return ModelProvider.CLAUDE + if any(name.startswith(p) for p in ('gemini',)): + return ModelProvider.GOOGLE + # Default to OpenAI for gpt-*, o1, o3, etc. + return ModelProvider.OPENAI + + +async def query_model_full( + context: Context, + user_prompt: str, + config: LLMConfig, + attachments=None, + tools=None, + openrouter_api_key=None, + images=None, +) -> str: + """Collect full response from llm_streamer (non-streaming wrapper).""" + chunks = [] + async for chunk in llm_streamer(context, user_prompt, config, attachments, tools, openrouter_api_key, images): + chunks.append(chunk) + return "".join(chunks) + + async def generate_title(user_prompt: str, response: str, api_key: str = None) -> str: """ Generate a short title (3-4 words) for a Q-A pair using gpt-5-nano. |
