Skip to main content
AI agent that analyzes uploaded PDF and DOCX documents based on user prompts.

Code

"""Document Analyzer Agent — analyzes uploaded PDF/DOCX documents based on a user prompt.

Features:
- Works with Bindu A2A FilePart messages
- Supports PDF and DOCX
- Prompt-driven analysis
- Multi-file support
"""

from bindu.penguin.bindufy import bindufy
from agno.agent import Agent
from agno.models.openrouter import OpenRouter
from dotenv import load_dotenv

import os
import io
import base64
from pypdf import PdfReader
from docx import Document

load_dotenv()

# Define LLM agent
agent = Agent(
    instructions = """
You are an advanced document analysis assistant.

Your job is to analyze uploaded documents and answer the user's prompt
based ONLY on the document content.

Guidelines:
- Carefully read the document text
- Extract relevant insights requested in the prompt
- Be structured and clear
- If the prompt asks for research insights, provide:
  - methodology
  - research gap
  - key findings
  - conclusions
- If the prompt asks for summary, provide concise bullet points
- Do not hallucinate information outside the document
""",
    model = OpenRouter(
        id = "arcee-ai/trinity-large-preview:free",
        api_key=os.getenv("OPENROUTER_API_KEY"),
    ),
)

# Document Parsing
def extract_text_from_pdf(file_bytes):
    """Extract text from pdf bytes"""
    try:
        reader = PdfReader(io.BytesIO(file_bytes))
    except Exception as e:
        raise ValueError(f"Invalid PDF file: {str(e)}")
    text = []

    for page in reader.pages:
        try:
            page_text = page.extract_text()
            if page_text:
                text.append(page_text)
        except Exception:
            continue

    return "\n".join(text)

def extract_text_from_docx(file_bytes):
    """Extract text from docx bytes"""
    doc = Document(io.BytesIO(file_bytes))
    return "\n".join([p.text for p in doc.paragraphs])

def extract_document_text(file_bytes, mime_type):
    """Parse document according to their mime type"""
    if mime_type == "application/pdf":
        return extract_text_from_pdf(file_bytes)

    if mime_type in [
        "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
    ]:
        return extract_text_from_docx(file_bytes)

    raise ValueError(f"Unsupported file type: {mime_type}")

# FilePart processing
def get_file_bytes(part):
    """Extract file bytes from FilePart"""
    file_info = part["file"]

    if "bytes" in file_info:
        data = file_info["bytes"]
    elif "data" in file_info:
        data = file_info["data"]
    else:
        raise ValueError("Unsupported file part format")

    if isinstance(data, str):
        return base64.b64decode(data)

    return data

# Handler
def handler(messages: list[dict]):
    """
    Receives task.history — a list of A2A Message objects.
    Each message has: role, parts[], kind, messageId, contextId, taskId
    Each part has: kind="text"|"file", and either text or file.bytes+mimeType
    """
    if not messages:
        return "No messages received."

    prompt = ""
    extracted_docs = []

    for msg in messages:
        role = msg.get("role")
        if role is not None and role != "user":
            continue

        parts = msg.get("parts") or []
        for part in parts:
            if part.get("kind") == "text":
                prompt = part.get("text", "")

            elif part.get("kind") == "file":
                try:
                    file_info = part.get("file", {})
                    b64_data = file_info.get("bytes") or file_info.get("data")
                    mime_type = file_info.get("mimeType", "")

                    if not b64_data:
                        raise ValueError("No file data found")

                    file_bytes = (
                        base64.b64decode(b64_data)
                        if isinstance(b64_data, str)
                        else b64_data
                    )
                    doc_text = extract_document_text(file_bytes, mime_type)
                    extracted_docs.append(doc_text)

                except Exception as e:
                    extracted_docs.append(f"Error processing file: {str(e)}")

    if not extracted_docs:
        return "No valid document found in the messages."

    combined_document = "\n\n".join(extracted_docs)
    result = agent.run(input=f"""
User Prompt:
{prompt}

Document Content:
{combined_document}

Provide analysis based on the prompt.
""")
    return result

# Bindu config
config = {
    "author" : "vyomrohila@gmail.com",
    "name" : "document_analyzer_agent",
    "description": "AI agent that analyzes uploaded PDF or DOCX documents based on a user prompt.",
    "deployment": {
        "url": "http://localhost:3773",
        "expose": True,
        "cors_origins": ["http://localhost:5173"],
    },
    "skills": ["skills/document-processing"],
    "enable_system_message": False,
}

if __name__ == "__main__":
    bindufy(config, handler)

How It Works

Document Processing
  • extract_text_from_pdf: Parses PDF files using pypdf library
  • extract_text_from_docx: Extracts text from DOCX files
  • Handles file bytes and base64 decoding
  • Multi-file support with combined analysis
A2A Message Handling
  • Processes Bindu A2A FilePart messages
  • Handles both text prompts and file uploads
  • Role-based message filtering (user messages only)
  • Robust error handling for malformed data
Analysis Capabilities
  • Prompt-driven document analysis
  • Research insights extraction (methodology, gaps, findings)
  • Concise summarization capabilities
  • Content-based responses only (no hallucination)
File Support
  • PDF documents via pypdf reader
  • DOCX documents via python-docx
  • MIME type detection and validation
  • Automatic text extraction and processing

Run

uv run examples/specialized/document-analyzer.py
Try: Upload a research paper and ask “Summarize the key findings and methodology” Go to frontend and run npm run dev Open http://localhost:5173 and try to chat with the document analyzer