Skip to main content
AI agent that analyzes uploaded PDF and DOCX documents based on user prompts.

Code

Create document-analyzer.py with the code below, or save it directly from your editor.
"""Document Analyzer Agent — analyzes uploaded PDF/DOCX documents based on a user prompt.

Features:
- Works with Bindu A2A FilePart messages
- Supports PDF and DOCX
- Prompt-driven analysis
- Multi-file support
"""

from bindu.penguin.bindufy import bindufy
from agno.agent import Agent
from agno.models.openrouter import OpenRouter
from dotenv import load_dotenv

import os
import io
import base64
from pypdf import PdfReader
from docx import Document

load_dotenv()

# Define LLM agent
agent = Agent(
    instructions = """
You are an advanced document analysis assistant.

Your job is to analyze uploaded documents and answer the user's prompt
based ONLY on the document content.

Guidelines:
- Carefully read the document text
- Extract relevant insights requested in the prompt
- Be structured and clear
- If the prompt asks for research insights, provide:
  - methodology
  - research gap
  - key findings
  - conclusions
- If the prompt asks for summary, provide concise bullet points
- Do not hallucinate information outside the document
""",
    model = OpenRouter(
        id = "arcee-ai/trinity-large-preview:free",
        api_key=os.getenv("OPENROUTER_API_KEY"),
    ),
)

# Document Parsing
def extract_text_from_pdf(file_bytes):
    """Extract text from pdf bytes"""
    try:
        reader = PdfReader(io.BytesIO(file_bytes))
    except Exception as e:
        raise ValueError(f"Invalid PDF file: {str(e)}")
    text = []

    for page in reader.pages:
        try:
            page_text = page.extract_text()
            if page_text:
                text.append(page_text)
        except Exception:
            continue

    return "\n".join(text)

def extract_text_from_docx(file_bytes):
    """Extract text from docx bytes"""
    doc = Document(io.BytesIO(file_bytes))
    return "\n".join([p.text for p in doc.paragraphs])

def extract_document_text(file_bytes, mime_type):
    """Parse document according to their mime type"""
    if mime_type == "application/pdf":
        return extract_text_from_pdf(file_bytes)

    if mime_type in [
        "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
    ]:
        return extract_text_from_docx(file_bytes)

    raise ValueError(f"Unsupported file type: {mime_type}")

# FilePart processing
def get_file_bytes(part):
    """Extract file bytes from FilePart"""
    file_info = part["file"]

    if "bytes" in file_info:
        data = file_info["bytes"]
    elif "data" in file_info:
        data = file_info["data"]
    else:
        raise ValueError("Unsupported file part format")

    if isinstance(data, str):
        return base64.b64decode(data)

    return data

# Handler
def handler(messages: list[dict]):
    """
    Receives task.history — a list of A2A Message objects.
    Each message has: role, parts[], kind, messageId, contextId, taskId
    Each part has: kind="text"|"file", and either text or file.bytes+mimeType
    """
    if not messages:
        return "No messages received."

    prompt = ""
    extracted_docs = []

    for msg in messages:
        role = msg.get("role")
        if role is not None and role != "user":
            continue

        parts = msg.get("parts") or []
        for part in parts:
            if part.get("kind") == "text":
                prompt = part.get("text", "")

            elif part.get("kind") == "file":
                try:
                    file_info = part.get("file", {})
                    b64_data = file_info.get("bytes") or file_info.get("data")
                    mime_type = file_info.get("mimeType", "")

                    if not b64_data:
                        raise ValueError("No file data found")

                    file_bytes = (
                        base64.b64decode(b64_data)
                        if isinstance(b64_data, str)
                        else b64_data
                    )
                    doc_text = extract_document_text(file_bytes, mime_type)
                    extracted_docs.append(doc_text)

                except Exception as e:
                    extracted_docs.append(f"Error processing file: {str(e)}")

    if not extracted_docs:
        return "No valid document found in the messages."

    combined_document = "\n\n".join(extracted_docs)
    result = agent.run(input=f"""
User Prompt:
{prompt}

Document Content:
{combined_document}

Provide analysis based on the prompt.
""")
    return result

# Bindu config
config = {
    "author" : "vyomrohila@gmail.com",
    "name" : "document_analyzer_agent",
    "description": "AI agent that analyzes uploaded PDF or DOCX documents based on a user prompt.",
    "deployment": {
        "url": "http://localhost:3773",
        "expose": True,
        "cors_origins": ["http://localhost:5173"],
    },
    "skills": ["skills/document-processing"],
    "enable_system_message": False,
}

if __name__ == "__main__":
    bindufy(config, handler)

Skill Configuration

Create skills/document-processing/skill.yaml:
# Document Analysis Skill
# Analyze documents (PDF or DOCX) based on custom user prompts to extract insights

# Basic Metadata
id: document-analysis-v1
name: document-analysis
version: 1.1.0
author: your.email@example.com

# Description
description: |
  Analyze PDF and DOCX documents based on custom user prompts.
  Extracts targeted insights such as summaries, key information,
  important dates, financial data, legal clauses, action items,
  or structured knowledge from any document.

  Supports a wide range of documents including reports, contracts,
  manuals, resumes, research papers, meeting notes, and business documents.

# Tags and Modes
tags:
  - document
  - analysis
  - pdf
  - docx
  - summarization
  - knowledge-extraction
  - nlp

input_modes:
  - application/pdf
  - application/vnd.openxmlformats-officedocument.wordprocessingml.document

output_modes:
  - text/plain
  - application/json

# Example Queries
examples:
  - "Summarize the main points of this document"
  - "Extract the key information from this file"
  - "Provide a short summary of each section"
  - "List all important dates mentioned in the document"
  - "Identify the main topics discussed"
  - "Extract all action items or tasks mentioned"
  - "List any names, organizations, or locations referenced"
  - "Highlight the most important insights from the document"
  - "Explain the document in simple terms"
  - "Provide a structured outline of the document"
  - "Extract any numbers, statistics, or financial data"
  - "Identify any instructions or procedures mentioned"
  - "List conclusions or recommendations"
  - "Find any deadlines or time-sensitive information"
  - "Provide key takeaways from the document"

# Detailed Capabilities
capabilities_detail:

  document_preprocessing:
    supported: true
    features:
      - text_cleaning
      - header_footer_removal
      - page_number_removal
      - whitespace_normalization
      - duplicate_line_removal

  document_analysis:
    supported: true
    types:
      - topic_identification
      - key_information_extraction
      - entity_extraction
      - important_date_detection
      - instruction_identification
    output_formats:
      - structured_report
      - bullet_points
      - section_wise_summary

  document_summarization:
    supported: true
    types:
      - executive_summary
      - section_wise_summary
      - key_points_extraction
      - simplified_explanation
    customizable: true
    prompt_driven: true

  contract_and_legal_analysis:
    supported: true
    types:
      - clause_extraction
      - obligation_identification
      - risk_flagging
      - party_identification
      - date_and_deadline_extraction

  financial_document_analysis:
    supported: true
    types:
      - figure_extraction
      - trend_identification
      - ratio_analysis
      - anomaly_flagging

  multi_document_analysis:
    supported: true
    operations:
      - document_comparison
      - cross_document_summary
      - duplicate_information_detection
      - consistency_check

  custom_prompt_analysis:
    supported: true
    description: "User provides any free-form analytical prompt; agent tailors response accordingly"
    web_search_enrichment: true

# Requirements
requirements:
  packages:
    - pypdf>=3.0.0
    - python-docx>=1.1.0
  system: []
  min_memory_mb: 256

# Performance Metrics
performance:
  avg_processing_time_ms: 3000
  avg_time_per_page_ms: 300
  max_file_size_mb: 50
  max_pages: 500
  concurrent_requests: 5
  memory_per_request_mb: 256
  timeout_per_page_seconds: 30
  scalability: horizontal

# Tool Restrictions
allowed_tools:
  - Read
  - Write
  - WebSearch

# Rich Documentation
documentation:
  overview: |
    This agent analyzes PDF and DOCX documents based on a custom user-provided prompt.
    Instead of performing generic summarization, it adapts its analysis to the user's
    request — whether extracting key information, identifying tasks, summarizing
    sections, or pulling structured insights from a document.

    It uses OpenRouter's gpt-oss-120b for advanced language understanding and
    optionally enriches analysis with web search to validate references or
    provide additional context.

  use_cases:
    when_to_use:
      - User uploads a document and wants a summary
      - User wants important information extracted from a report
      - User wants tasks, instructions, or recommendations identified
      - User wants important dates, numbers, or entities extracted
      - User wants a structured outline of a document
      - User wants to compare multiple documents

    when_not_to_use:
      - PDF form filling (use pdf-processing agent)
      - Table extraction only (use pdf-processing agent)
      - PDF merging, splitting, or editing (use pdf-manipulator agent)
      - Image extraction from documents (use pdf-image-extractor agent)
      - Real-time document streaming (not supported)

  input_structure: |
    Accepts one or more document files with a custom analytical prompt:

    {
      "files": [
        {
          "name": "document.pdf",
          "mime_type": "application/pdf",
          "data": "<base64_encoded_bytes>"
        }
      ],
      "prompt": "Summarize key points and extract important dates",
      "options": {
        "web_search": true,
        "cite_sections": true,
        "output_format": "structured"
      }
    }

  file_constraints:
    - Max size: 50MB
    - Max pages: 500
    - Formats: PDF 1.0–2.0, DOCX (Office Open XML)

  output_format: |
    Structured Analysis:
    {
      "success": true,
      "analysis": {
        "prompt": "Summarize key points and extract important dates",
        "sections": [
          {
            "heading": "Key Summary",
            "content": "The document discusses...",
            "citations": ["Page 3"],
            "confidence": 0.93
          },
          {
            "heading": "Important Dates",
            "content": "March 15, 2026 – Submission deadline...",
            "citations": ["Page 5"],
            "confidence": 0.91
          }
        ]
      },
      "metadata": {
        "filename": "document.pdf",
        "total_pages": 12,
        "processing_time_ms": 3200,
        "web_search_used": true
      }
    }

  error_handling:
    - "Unsupported file type: Returns error with list of supported formats"
    - "Empty or corrupted file: Returns validation error with details"
    - "Prompt too vague: Agent asks clarifying question before proceeding"
    - "Document exceeds size limit: Returns error with file size constraints"
    - "Web search unavailable: Falls back to document-only analysis"
    - "Timeout: Returns partial analysis with notice of truncation"

  best_practices:
    for_developers:
      - "Encourage users to provide clear and focused prompts"
      - "Use cite_sections option to ground answers in document evidence"
      - "Enable web search when contextual knowledge is required"
      - "Handle large documents carefully with truncation warnings"
      - "Cache analysis results for repeated queries"

    for_orchestrators:
      - "Route to pdf-processing if table extraction is required"
      - "Chain with question-answering agent for follow-up queries"
      - "Use file hash to prevent repeated processing"
      - "Monitor token usage for large documents"
      - "Implement retry logic for large document processing"

  installation: |
    Required packages:
    pip install pypdf python-docx

    No system-level dependencies required for standard PDF/DOCX text extraction.
    For scanned PDFs requiring OCR, chain with pdf-processing skill.

  versioning:
    - version: "1.0.0"
      date: "2025-03-06"
      changes: "Initial release with prompt-driven document analysis"
    - version: "1.1.0"
      date: "2026-03-06"
      changes: "Added preprocessing, multi-document analysis, and confidence scoring"

# Assessment fields for skill negotiation
assessment:
  keywords:
    - analyze
    - analysis
    - document
    - summarize
    - summary
    - extract
    - insights
    - review
    - key points
    - information
    - tasks
    - dates
    - statistics

  specializations:
    - domain: document_analysis
      confidence_boost: 0.4
    - domain: contract_review
      confidence_boost: 0.3
    - domain: financial_analysis
      confidence_boost: 0.3
    - domain: custom_prompt_analysis
      confidence_boost: 0.4

  anti_patterns:
    - "fill form"
    - "form filling"
    - "merge pdf"
    - "split pdf"
    - "edit pdf"
    - "create pdf"
    - "generate pdf"
    - "extract images"
    - "convert pdf"

  complexity_indicators:
    simple:
      - "summarize"
      - "what is this about"
      - "key points"
    medium:
      - "extract information"
      - "identify topics"
      - "list dates"
      - "find numbers"
    complex:
      - "cross reference"
      - "compare documents"
      - "multi-section analysis"
      - "audit trail"

How It Works

Document Processing
  • extract_text_from_pdf: Parses PDF files using pypdf library
  • extract_text_from_docx: Extracts text from DOCX files
  • Handles file bytes and base64 decoding
  • Multi-file support with combined analysis
A2A Message Handling
  • Processes Bindu A2A FilePart messages
  • Handles both text prompts and file uploads
  • Role-based message filtering (user messages only)
  • Robust error handling for malformed data
Analysis Capabilities
  • Prompt-driven document analysis
  • Research insights extraction (methodology, gaps, findings)
  • Concise summarization capabilities
  • Content-based responses only (no hallucination)
File Support
  • PDF documents via pypdf reader
  • DOCX documents via python-docx
  • MIME type detection and validation
  • Automatic text extraction and processing

Dependencies

uv init
uv add bindu agno python-dotenv pypdf python-docx

Environment Setup

Create .env file:
OPENROUTER_API_KEY=your_openrouter_api_key_here

Run

uv run document-analyzer.py
Examples:
  • “Analyze the uploaded contract and extract all key obligations”
  • “Review the terms and conditions document and highlight any unusual clauses”
  • “Extract key dates and deadlines from the legal agreement”

Example API Calls

{
  "jsonrpc": "2.0",
  "method": "message/send",
  "params": {
    "message": {
      "role": "user",
      "kind": "message",
      "messageId": "9f11c870-5616-49ad-b187-d93cbb100001",
      "contextId": "9f11c870-5616-49ad-b187-d93cbb100002",
      "taskId": "9f11c870-5616-49ad-b187-d93cbb100003",
      "parts": [
        {
          "kind": "text",
          "text": "Analyze the uploaded contract and extract all key obligations"
        }
      ]
    },
     "skillId": "document-analysis-v1",
    "configuration": {
      "acceptedOutputModes": ["application/json"]
    }
  },
  "id": "9f11c870-5616-49ad-b187-d93cbb100003"
}
{
  "jsonrpc": "2.0",
  "method": "tasks/get",
  "params": {
    "taskId": "9f11c870-5616-49ad-b187-d93cbb100003"
  },
  "id": "9f11c870-5616-49ad-b187-d93cbb100004"
}

Frontend Setup

# Clone the Bindu repository
git clone https://github.com/GetBindu/Bindu

# Navigate to frontend directory
cd frontend

# Install dependencies
npm install

# Start frontend development server
npm run dev
Open http://localhost:5173 and try to chat with the document analyzer