4 files changed, 525 insertions, 6 deletions
diff --git a/backend/app/main.py b/backend/app/main.py
index d48ec89..9370a32 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -8,8 +8,9 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, FileResponse
 from fastapi import UploadFile, File, Form
 from pydantic import BaseModel
-from app.schemas import NodeRunRequest, NodeRunResponse, MergeStrategy, Role, Message, Context, LLMConfig, ModelProvider, ReasoningEffort
-from app.services.llm import llm_streamer, generate_title, get_openai_client, get_anthropic_client
+from app.schemas import NodeRunRequest, NodeRunResponse, MergeStrategy, Role, Message, Context, LLMConfig, ModelProvider, ReasoningEffort, CouncilRunRequest
+from app.services.llm import llm_streamer, generate_title, get_openai_client, get_anthropic_client, resolve_provider
+from app.services.council import council_event_stream
 from app.auth import auth_router, get_current_user, get_current_user_optional, init_db, User, get_db
 from app.auth.utils import get_password_hash
 from dotenv import load_dotenv
@@ -421,17 +422,19 @@ async def run_node_stream(
             tools.append(tool_def)
         logger.debug("openai file_search: vs_ids=%s refs=%s filters=%s", vs_ids, debug_refs, filters)
     elif request.config.provider == ModelProvider.GOOGLE:
+        scoped_ids = resolve_scoped_file_ids(username, request.scopes, non_image_file_ids)
         attachments = await prepare_attachments(
             user=username,
             target_provider=request.config.provider,
-            attached_ids=non_image_file_ids,
+            attached_ids=scoped_ids,
             llm_config=request.config,
         )
     elif request.config.provider == ModelProvider.CLAUDE:
+        scoped_ids = resolve_scoped_file_ids(username, request.scopes, non_image_file_ids)
         attachments = await prepare_attachments(
             user=username,
             target_provider=request.config.provider,
-            attached_ids=non_image_file_ids,
+            attached_ids=scoped_ids,
             llm_config=request.config,
         )
 
@@ -442,6 +445,127 @@ async def run_node_stream(
         media_type="text/event-stream"
     )
 
+@app.post("/api/run_council_stream")
+async def run_council_stream(
+    request: CouncilRunRequest,
+    user: str = DEFAULT_USER,
+    current_user: User | None = Depends(get_current_user_optional),
+):
+    """
+    Run the 3-stage LLM Council and stream SSE events.
+    """
+    resolved = resolve_user(current_user, user)
+    username = resolved.username if resolved else DEFAULT_USER
+
+    # Merge incoming contexts (same logic as run_node_stream)
+    raw_messages = []
+    for ctx in request.incoming_contexts:
+        raw_messages.extend(ctx.messages)
+    if request.merge_strategy == MergeStrategy.SMART:
+        final_messages = smart_merge_messages(raw_messages)
+    else:
+        final_messages = raw_messages
+    execution_context = Context(messages=final_messages)
+
+    # Extract images from attached files
+    images, non_image_file_ids = extract_image_attachments(username, request.attached_file_ids)
+
+    openrouter_key = get_user_api_key(resolved, "openrouter")
+
+    # Build LLMConfig + attachments for each council member
+    member_configs: list[LLMConfig] = []
+    attachments_per_model: list[list[dict] | None] = []
+    tools_per_model: list[list[dict] | None] = []
+
+    all_model_names = [m.model_name for m in request.council_models] + [request.chairman_model]
+
+    for member in request.council_models:
+        provider = resolve_provider(member.model_name)
+        provider_str = provider.value
+        api_key = get_user_api_key(resolved, provider_str)
+
+        config = LLMConfig(
+            provider=provider,
+            model_name=member.model_name,
+            temperature=request.temperature,
+            system_prompt=request.system_prompt,
+            api_key=api_key,
+            reasoning_effort=request.reasoning_effort,
+        )
+        member_configs.append(config)
+
+        # Prepare provider-specific file attachments
+        tools: list[dict] = []
+        attachments: list[dict] = []
+
+        # For Google/Claude: resolve scope-based files so upstream attachments are visible
+        scoped_file_ids = resolve_scoped_file_ids(username, request.scopes, non_image_file_ids)
+
+        if provider == ModelProvider.OPENAI:
+            vs_ids, debug_refs, filters = await prepare_openai_vector_search(
+                user=username,
+                attached_ids=non_image_file_ids,
+                scopes=request.scopes,
+                llm_config=config,
+            )
+            if not vs_ids:
+                try:
+                    client = get_openai_client(config.api_key)
+                    vs_id = await ensure_user_vector_store(username, client)
+                    if vs_id:
+                        vs_ids = [vs_id]
+                except Exception:
+                    pass
+            if vs_ids:
+                tool_def = {"type": "file_search", "vector_store_ids": vs_ids}
+                if filters:
+                    tool_def["filters"] = filters
+                tools.append(tool_def)
+        elif provider == ModelProvider.GOOGLE:
+            attachments = await prepare_attachments(
+                user=username,
+                target_provider=provider,
+                attached_ids=scoped_file_ids,
+                llm_config=config,
+            )
+        elif provider == ModelProvider.CLAUDE:
+            attachments = await prepare_attachments(
+                user=username,
+                target_provider=provider,
+                attached_ids=scoped_file_ids,
+                llm_config=config,
+            )
+
+        attachments_per_model.append(attachments or None)
+        tools_per_model.append(tools or None)
+
+    # Build chairman config
+    chairman_provider = resolve_provider(request.chairman_model)
+    chairman_api_key = get_user_api_key(resolved, chairman_provider.value)
+    chairman_config = LLMConfig(
+        provider=chairman_provider,
+        model_name=request.chairman_model,
+        temperature=request.temperature,
+        system_prompt=request.system_prompt,
+        api_key=chairman_api_key,
+        reasoning_effort=request.reasoning_effort,
+    )
+
+    return StreamingResponse(
+        council_event_stream(
+            user_prompt=request.user_prompt,
+            context=execution_context,
+            member_configs=member_configs,
+            chairman_config=chairman_config,
+            attachments_per_model=attachments_per_model,
+            tools_per_model=tools_per_model,
+            openrouter_api_key=openrouter_key,
+            images=images,
+        ),
+        media_type="text/event-stream",
+    )
+
+
 class TitleRequest(BaseModel):
     user_prompt: str
     response: str
@@ -832,6 +956,36 @@ def save_files_index(user: str, items: List[FileMeta]):
         json.dump([item.model_dump() for item in items], f, ensure_ascii=False, indent=2)
 
 
+def resolve_scoped_file_ids(user: str, scopes: List[str], explicit_ids: List[str]) -> List[str]:
+    """
+    Resolve file IDs that are relevant to the given scopes (upstream nodes).
+    Combines scope-matched files with explicitly attached files.
+    This gives Google/Claude the same scope awareness that OpenAI gets via file_search.
+    """
+    if not scopes and not explicit_ids:
+        return []
+
+    items = load_files_index(user)
+    result_ids: dict[str, bool] = {}
+
+    # Add explicitly attached files first
+    for fid in explicit_ids:
+        result_ids[fid] = True
+
+    # Add files whose scopes intersect with requested scopes (skip images)
+    if scopes:
+        for item in items:
+            if item.id in result_ids:
+                continue
+            if item.mime in IMAGE_MIME_TYPES:
+                continue
+            if item.scopes and any(s in scopes for s in item.scopes):
+                result_ids[item.id] = True
+                logger.debug("resolve_scoped_file_ids: scope match %s -> %s", item.name, item.id)
+
+    return list(result_ids.keys())
+
+
 async def _check_google_file_active(uri_or_name: str, api_key: str = None) -> bool:
     """Check if a Google file reference is still ACTIVE (not expired)."""
     key = api_key or os.getenv("GOOGLE_API_KEY")
diff --git a/backend/app/schemas.py b/backend/app/schemas.py
index 8e5f12c..a527004 100644
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@@ -34,7 +34,7 @@ class LLMConfig(BaseModel):
     provider: ModelProvider
     model_name: str
     temperature: float = 0.7
-    max_tokens: int = 1000
+    max_tokens: Optional[int] = None
     system_prompt: Optional[str] = None
     api_key: Optional[str] = None  # Optional override, usually from env
     enable_google_search: bool = False
@@ -55,6 +55,22 @@ class NodeRunRequest(BaseModel):
     # Contains all project/node combinations in the current trace
     scopes: List[str] = Field(default_factory=list)
     
+class CouncilMemberConfig(BaseModel):
+    model_name: str  # e.g. "gpt-5", "claude-opus-4-6", "gemini-3-pro-preview"
+
+class CouncilRunRequest(BaseModel):
+    node_id: str
+    incoming_contexts: List[Context] = []
+    user_prompt: str
+    council_models: List[CouncilMemberConfig]  # 2-6 models
+    chairman_model: str  # Model name for synthesis
+    system_prompt: Optional[str] = None
+    temperature: float = 0.7
+    reasoning_effort: ReasoningEffort = ReasoningEffort.MEDIUM
+    merge_strategy: MergeStrategy = MergeStrategy.SMART
+    attached_file_ids: List[str] = Field(default_factory=list)
+    scopes: List[str] = Field(default_factory=list)
+
 class NodeRunResponse(BaseModel):
     node_id: str
     output_context: Context
diff --git a/backend/app/services/council.py b/backend/app/services/council.py
new file mode 100644
index 0000000..d177f44
--- /dev/null
+++ b/backend/app/services/council.py
@@ -0,0 +1,322 @@
+"""3-stage LLM Council orchestration for ContextFlow."""
+
+import asyncio
+import json
+import logging
+import re
+from collections import defaultdict
+from typing import AsyncGenerator, Dict, List, Any, Optional, Tuple
+
+from app.schemas import Context, LLMConfig
+from app.services.llm import query_model_full, llm_streamer
+
+logger = logging.getLogger("contextflow.council")
+
+
+async def stage1_collect_responses(
+    user_prompt: str,
+    context: Context,
+    configs: List[LLMConfig],
+    attachments_per_model: Optional[List[Optional[List[Dict[str, Any]]]]] = None,
+    tools_per_model: Optional[List[Optional[List[Dict[str, Any]]]]] = None,
+    openrouter_api_key: Optional[str] = None,
+    images: Optional[List[Dict[str, Any]]] = None,
+) -> AsyncGenerator[Dict[str, Any], None]:
+    """
+    Stage 1: Query all council member models in parallel.
+    Yields events as each model completes.
+    Returns final list via stage1_complete event.
+    """
+    async def _query_one(idx: int, config: LLMConfig) -> Dict[str, Any]:
+        atts = attachments_per_model[idx] if attachments_per_model else None
+        tls = tools_per_model[idx] if tools_per_model else None
+        try:
+            response = await query_model_full(
+                context, user_prompt, config,
+                attachments=atts, tools=tls,
+                openrouter_api_key=openrouter_api_key, images=images,
+            )
+            return {"model": config.model_name, "response": response}
+        except Exception as e:
+            logger.error("Council stage1 failed for %s: %s", config.model_name, e)
+            return {"model": config.model_name, "response": f"[Error: {e}]"}
+
+    # Launch all queries concurrently, yield as each completes
+    tasks = {
+        asyncio.ensure_future(_query_one(i, cfg)): i
+        for i, cfg in enumerate(configs)
+    }
+    results: List[Optional[Dict[str, Any]]] = [None] * len(configs)
+
+    for coro in asyncio.as_completed(tasks.keys()):
+        result = await coro
+        idx = tasks[[t for t in tasks if t.done() and t.result() is result][0]]
+        results[idx] = result
+        yield result  # caller sends stage1_model_complete event
+
+    # Not yielded — caller collects via the individual yields
+
+
+def _build_ranking_prompt(user_query: str, stage1_results: List[Dict[str, Any]]) -> str:
+    """Build the anonymized ranking prompt for Stage 2."""
+    labels = [chr(65 + i) for i in range(len(stage1_results))]  # A, B, C, ...
+
+    responses_text = "\n\n".join([
+        f"Response {label}:\n{result['response']}"
+        for label, result in zip(labels, stage1_results)
+    ])
+
+    return f"""You are evaluating different responses to the following question:
+
+Question: {user_query}
+
+Here are the responses from different models (anonymized):
+
+{responses_text}
+
+Your task:
+1. First, evaluate each response individually. For each response, explain what it does well and what it does poorly.
+2. Then, at the very end of your response, provide a final ranking.
+
+IMPORTANT: Your final ranking MUST be formatted EXACTLY as follows:
+- Start with the line "FINAL RANKING:" (all caps, with colon)
+- Then list the responses from best to worst as a numbered list
+- Each line should be: number, period, space, then ONLY the response label (e.g., "1. Response A")
+- Do not add any other text or explanations in the ranking section
+
+Example of the correct format for your ENTIRE response:
+
+Response A provides good detail on X but misses Y...
+Response B is accurate but lacks depth on Z...
+Response C offers the most comprehensive answer...
+
+FINAL RANKING:
+1. Response C
+2. Response A
+3. Response B
+
+Now provide your evaluation and ranking:"""
+
+
+def parse_ranking_from_text(ranking_text: str) -> List[str]:
+    """Parse the FINAL RANKING section from the model's response."""
+    if "FINAL RANKING:" in ranking_text:
+        parts = ranking_text.split("FINAL RANKING:")
+        if len(parts) >= 2:
+            ranking_section = parts[1]
+            numbered_matches = re.findall(r'\d+\.\s*Response [A-Z]', ranking_section)
+            if numbered_matches:
+                return [re.search(r'Response [A-Z]', m).group() for m in numbered_matches]
+            matches = re.findall(r'Response [A-Z]', ranking_section)
+            return matches
+
+    matches = re.findall(r'Response [A-Z]', ranking_text)
+    return matches
+
+
+async def stage2_collect_rankings(
+    user_query: str,
+    stage1_results: List[Dict[str, Any]],
+    configs: List[LLMConfig],
+    openrouter_api_key: Optional[str] = None,
+) -> Tuple[List[Dict[str, Any]], Dict[str, str]]:
+    """
+    Stage 2: Each model ranks the anonymized responses.
+    Text-only prompts, no file attachments.
+    """
+    labels = [chr(65 + i) for i in range(len(stage1_results))]
+    label_to_model = {
+        f"Response {label}": result['model']
+        for label, result in zip(labels, stage1_results)
+    }
+
+    ranking_prompt = _build_ranking_prompt(user_query, stage1_results)
+    empty_context = Context(messages=[])
+
+    async def _rank_one(config: LLMConfig) -> Dict[str, Any]:
+        try:
+            response = await query_model_full(
+                empty_context, ranking_prompt, config,
+                openrouter_api_key=openrouter_api_key,
+            )
+            parsed = parse_ranking_from_text(response)
+            return {
+                "model": config.model_name,
+                "ranking": response,
+                "parsed_ranking": parsed,
+            }
+        except Exception as e:
+            logger.error("Council stage2 failed for %s: %s", config.model_name, e)
+            return {
+                "model": config.model_name,
+                "ranking": f"[Error: {e}]",
+                "parsed_ranking": [],
+            }
+
+    results = await asyncio.gather(*[_rank_one(cfg) for cfg in configs])
+    return list(results), label_to_model
+
+
+def calculate_aggregate_rankings(
+    stage2_results: List[Dict[str, Any]],
+    label_to_model: Dict[str, str],
+) -> List[Dict[str, Any]]:
+    """Calculate aggregate rankings across all models."""
+    model_positions: Dict[str, List[int]] = defaultdict(list)
+
+    for ranking in stage2_results:
+        parsed_ranking = ranking.get("parsed_ranking", [])
+        if not parsed_ranking:
+            parsed_ranking = parse_ranking_from_text(ranking.get("ranking", ""))
+        for position, label in enumerate(parsed_ranking, start=1):
+            if label in label_to_model:
+                model_name = label_to_model[label]
+                model_positions[model_name].append(position)
+
+    aggregate = []
+    for model, positions in model_positions.items():
+        if positions:
+            avg_rank = sum(positions) / len(positions)
+            aggregate.append({
+                "model": model,
+                "average_rank": round(avg_rank, 2),
+                "rankings_count": len(positions),
+            })
+
+    aggregate.sort(key=lambda x: x["average_rank"])
+    return aggregate
+
+
+def _build_chairman_prompt(
+    user_query: str,
+    stage1_results: List[Dict[str, Any]],
+    stage2_results: List[Dict[str, Any]],
+) -> str:
+    """Build the chairman synthesis prompt for Stage 3."""
+    stage1_text = "\n\n".join([
+        f"Model: {result['model']}\nResponse: {result['response']}"
+        for result in stage1_results
+    ])
+
+    stage2_text = "\n\n".join([
+        f"Model: {result['model']}\nRanking: {result['ranking']}"
+        for result in stage2_results
+    ])
+
+    return f"""You are the Chairman of an LLM Council. Multiple AI models have provided responses to a user's question, and then ranked each other's responses.
+
+Original Question: {user_query}
+
+STAGE 1 - Individual Responses:
+{stage1_text}
+
+STAGE 2 - Peer Rankings:
+{stage2_text}
+
+Your task as Chairman is to synthesize all of this information into a single, comprehensive, accurate answer to the user's original question. Consider:
+- The individual responses and their insights
+- The peer rankings and what they reveal about response quality
+- Any patterns of agreement or disagreement
+
+Provide a clear, well-reasoned final answer that represents the council's collective wisdom:"""
+
+
+async def stage3_stream_synthesis(
+    user_query: str,
+    stage1_results: List[Dict[str, Any]],
+    stage2_results: List[Dict[str, Any]],
+    chairman_config: LLMConfig,
+    openrouter_api_key: Optional[str] = None,
+) -> AsyncGenerator[str, None]:
+    """
+    Stage 3: Chairman synthesizes final answer. Streams text chunks.
+    """
+    chairman_prompt = _build_chairman_prompt(user_query, stage1_results, stage2_results)
+    empty_context = Context(messages=[])
+
+    async for chunk in llm_streamer(
+        empty_context, chairman_prompt, chairman_config,
+        openrouter_api_key=openrouter_api_key,
+    ):
+        yield chunk
+
+
+def _sse_event(data: dict) -> str:
+    """Format a dict as an SSE data line."""
+    return f"data: {json.dumps(data)}\n\n"
+
+
+async def council_event_stream(
+    user_prompt: str,
+    context: Context,
+    member_configs: List[LLMConfig],
+    chairman_config: LLMConfig,
+    attachments_per_model: Optional[List[Optional[List[Dict[str, Any]]]]] = None,
+    tools_per_model: Optional[List[Optional[List[Dict[str, Any]]]]] = None,
+    openrouter_api_key: Optional[str] = None,
+    images: Optional[List[Dict[str, Any]]] = None,
+) -> AsyncGenerator[str, None]:
+    """
+    Master orchestrator yielding SSE JSON events through the 3-stage council process.
+    """
+    # === Stage 1 ===
+    yield _sse_event({"type": "stage1_start"})
+
+    stage1_results: List[Dict[str, Any]] = []
+    async for result in stage1_collect_responses(
+        user_prompt, context, member_configs,
+        attachments_per_model=attachments_per_model,
+        tools_per_model=tools_per_model,
+        openrouter_api_key=openrouter_api_key,
+        images=images,
+    ):
+        stage1_results.append(result)
+        yield _sse_event({
+            "type": "stage1_model_complete",
+            "data": {"model": result["model"], "response": result["response"]},
+        })
+
+    yield _sse_event({"type": "stage1_complete", "data": stage1_results})
+
+    if not stage1_results:
+        yield _sse_event({
+            "type": "error",
+            "data": {"message": "All council models failed to respond."},
+        })
+        return
+
+    # === Stage 2 ===
+    yield _sse_event({"type": "stage2_start"})
+
+    stage2_results, label_to_model = await stage2_collect_rankings(
+        user_prompt, stage1_results, member_configs,
+        openrouter_api_key=openrouter_api_key,
+    )
+    aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
+
+    yield _sse_event({
+        "type": "stage2_complete",
+        "data": {
+            "rankings": stage2_results,
+            "label_to_model": label_to_model,
+            "aggregate_rankings": aggregate_rankings,
+        },
+    })
+
+    # === Stage 3 (streamed) ===
+    yield _sse_event({"type": "stage3_start"})
+
+    full_response = ""
+    async for chunk in stage3_stream_synthesis(
+        user_prompt, stage1_results, stage2_results, chairman_config,
+        openrouter_api_key=openrouter_api_key,
+    ):
+        full_response += chunk
+        yield _sse_event({"type": "stage3_chunk", "data": {"chunk": chunk}})
+
+    yield _sse_event({
+        "type": "stage3_complete",
+        "data": {"model": chairman_config.model_name, "response": full_response},
+    })
+
+    yield _sse_event({"type": "complete"})
diff --git a/backend/app/services/llm.py b/backend/app/services/llm.py
index 7efdce0..c22ada3 100644
--- a/backend/app/services/llm.py
+++ b/backend/app/services/llm.py
@@ -4,7 +4,7 @@ from typing import AsyncGenerator, List, Dict, Any, Optional
 import openai
 import google.generativeai as genai
 import anthropic
-from app.schemas import LLMConfig, Message, Role, Context
+from app.schemas import LLMConfig, Message, Role, Context, ModelProvider
 
 logger = logging.getLogger("contextflow.llm")
 
@@ -599,6 +599,33 @@ async def llm_streamer(
             yield f"Error calling LLM: {primary_error} (OpenRouter fallback also failed: {fallback_error})"
 
 
+def resolve_provider(model_name: str) -> ModelProvider:
+    """Determine the provider from a model name string."""
+    name = model_name.lower()
+    if any(name.startswith(p) for p in ('claude',)):
+        return ModelProvider.CLAUDE
+    if any(name.startswith(p) for p in ('gemini',)):
+        return ModelProvider.GOOGLE
+    # Default to OpenAI for gpt-*, o1, o3, etc.
+    return ModelProvider.OPENAI
+
+
+async def query_model_full(
+    context: Context,
+    user_prompt: str,
+    config: LLMConfig,
+    attachments=None,
+    tools=None,
+    openrouter_api_key=None,
+    images=None,
+) -> str:
+    """Collect full response from llm_streamer (non-streaming wrapper)."""
+    chunks = []
+    async for chunk in llm_streamer(context, user_prompt, config, attachments, tools, openrouter_api_key, images):
+        chunks.append(chunk)
+    return "".join(chunks)
+
+
 async def generate_title(user_prompt: str, response: str, api_key: str = None) -> str:
     """
     Generate a short title (3-4 words) for a Q-A pair using gpt-5-nano.