Code
Createdocument-analyzer.py with the code below, or save it directly from your editor.
"""Document Analyzer Agent — analyzes uploaded PDF/DOCX documents based on a user prompt.
Features:
- Works with Bindu A2A FilePart messages
- Supports PDF and DOCX
- Prompt-driven analysis
- Multi-file support
"""
from bindu.penguin.bindufy import bindufy
from agno.agent import Agent
from agno.models.openrouter import OpenRouter
from dotenv import load_dotenv
import os
import io
import base64
from pypdf import PdfReader
from docx import Document
load_dotenv()
# Define LLM agent
agent = Agent(
instructions = """
You are an advanced document analysis assistant.
Your job is to analyze uploaded documents and answer the user's prompt
based ONLY on the document content.
Guidelines:
- Carefully read the document text
- Extract relevant insights requested in the prompt
- Be structured and clear
- If the prompt asks for research insights, provide:
- methodology
- research gap
- key findings
- conclusions
- If the prompt asks for summary, provide concise bullet points
- Do not hallucinate information outside the document
""",
model = OpenRouter(
id = "arcee-ai/trinity-large-preview:free",
api_key=os.getenv("OPENROUTER_API_KEY"),
),
)
# Document Parsing
def extract_text_from_pdf(file_bytes):
"""Extract text from pdf bytes"""
try:
reader = PdfReader(io.BytesIO(file_bytes))
except Exception as e:
raise ValueError(f"Invalid PDF file: {str(e)}")
text = []
for page in reader.pages:
try:
page_text = page.extract_text()
if page_text:
text.append(page_text)
except Exception:
continue
return "\n".join(text)
def extract_text_from_docx(file_bytes):
"""Extract text from docx bytes"""
doc = Document(io.BytesIO(file_bytes))
return "\n".join([p.text for p in doc.paragraphs])
def extract_document_text(file_bytes, mime_type):
"""Parse document according to their mime type"""
if mime_type == "application/pdf":
return extract_text_from_pdf(file_bytes)
if mime_type in [
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
]:
return extract_text_from_docx(file_bytes)
raise ValueError(f"Unsupported file type: {mime_type}")
# FilePart processing
def get_file_bytes(part):
"""Extract file bytes from FilePart"""
file_info = part["file"]
if "bytes" in file_info:
data = file_info["bytes"]
elif "data" in file_info:
data = file_info["data"]
else:
raise ValueError("Unsupported file part format")
if isinstance(data, str):
return base64.b64decode(data)
return data
# Handler
def handler(messages: list[dict]):
"""
Receives task.history — a list of A2A Message objects.
Each message has: role, parts[], kind, messageId, contextId, taskId
Each part has: kind="text"|"file", and either text or file.bytes+mimeType
"""
if not messages:
return "No messages received."
prompt = ""
extracted_docs = []
for msg in messages:
role = msg.get("role")
if role is not None and role != "user":
continue
parts = msg.get("parts") or []
for part in parts:
if part.get("kind") == "text":
prompt = part.get("text", "")
elif part.get("kind") == "file":
try:
file_info = part.get("file", {})
b64_data = file_info.get("bytes") or file_info.get("data")
mime_type = file_info.get("mimeType", "")
if not b64_data:
raise ValueError("No file data found")
file_bytes = (
base64.b64decode(b64_data)
if isinstance(b64_data, str)
else b64_data
)
doc_text = extract_document_text(file_bytes, mime_type)
extracted_docs.append(doc_text)
except Exception as e:
extracted_docs.append(f"Error processing file: {str(e)}")
if not extracted_docs:
return "No valid document found in the messages."
combined_document = "\n\n".join(extracted_docs)
result = agent.run(input=f"""
User Prompt:
{prompt}
Document Content:
{combined_document}
Provide analysis based on the prompt.
""")
return result
# Bindu config
config = {
"author" : "vyomrohila@gmail.com",
"name" : "document_analyzer_agent",
"description": "AI agent that analyzes uploaded PDF or DOCX documents based on a user prompt.",
"deployment": {
"url": "http://localhost:3773",
"expose": True,
"cors_origins": ["http://localhost:5173"],
},
"skills": ["skills/document-processing"],
"enable_system_message": False,
}
if __name__ == "__main__":
bindufy(config, handler)
Skill Configuration
Createskills/document-processing/skill.yaml:
# Document Analysis Skill
# Analyze documents (PDF or DOCX) based on custom user prompts to extract insights
# Basic Metadata
id: document-analysis-v1
name: document-analysis
version: 1.1.0
author: your.email@example.com
# Description
description: |
Analyze PDF and DOCX documents based on custom user prompts.
Extracts targeted insights such as summaries, key information,
important dates, financial data, legal clauses, action items,
or structured knowledge from any document.
Supports a wide range of documents including reports, contracts,
manuals, resumes, research papers, meeting notes, and business documents.
# Tags and Modes
tags:
- document
- analysis
- pdf
- docx
- summarization
- knowledge-extraction
- nlp
input_modes:
- application/pdf
- application/vnd.openxmlformats-officedocument.wordprocessingml.document
output_modes:
- text/plain
- application/json
# Example Queries
examples:
- "Summarize the main points of this document"
- "Extract the key information from this file"
- "Provide a short summary of each section"
- "List all important dates mentioned in the document"
- "Identify the main topics discussed"
- "Extract all action items or tasks mentioned"
- "List any names, organizations, or locations referenced"
- "Highlight the most important insights from the document"
- "Explain the document in simple terms"
- "Provide a structured outline of the document"
- "Extract any numbers, statistics, or financial data"
- "Identify any instructions or procedures mentioned"
- "List conclusions or recommendations"
- "Find any deadlines or time-sensitive information"
- "Provide key takeaways from the document"
# Detailed Capabilities
capabilities_detail:
document_preprocessing:
supported: true
features:
- text_cleaning
- header_footer_removal
- page_number_removal
- whitespace_normalization
- duplicate_line_removal
document_analysis:
supported: true
types:
- topic_identification
- key_information_extraction
- entity_extraction
- important_date_detection
- instruction_identification
output_formats:
- structured_report
- bullet_points
- section_wise_summary
document_summarization:
supported: true
types:
- executive_summary
- section_wise_summary
- key_points_extraction
- simplified_explanation
customizable: true
prompt_driven: true
contract_and_legal_analysis:
supported: true
types:
- clause_extraction
- obligation_identification
- risk_flagging
- party_identification
- date_and_deadline_extraction
financial_document_analysis:
supported: true
types:
- figure_extraction
- trend_identification
- ratio_analysis
- anomaly_flagging
multi_document_analysis:
supported: true
operations:
- document_comparison
- cross_document_summary
- duplicate_information_detection
- consistency_check
custom_prompt_analysis:
supported: true
description: "User provides any free-form analytical prompt; agent tailors response accordingly"
web_search_enrichment: true
# Requirements
requirements:
packages:
- pypdf>=3.0.0
- python-docx>=1.1.0
system: []
min_memory_mb: 256
# Performance Metrics
performance:
avg_processing_time_ms: 3000
avg_time_per_page_ms: 300
max_file_size_mb: 50
max_pages: 500
concurrent_requests: 5
memory_per_request_mb: 256
timeout_per_page_seconds: 30
scalability: horizontal
# Tool Restrictions
allowed_tools:
- Read
- Write
- WebSearch
# Rich Documentation
documentation:
overview: |
This agent analyzes PDF and DOCX documents based on a custom user-provided prompt.
Instead of performing generic summarization, it adapts its analysis to the user's
request — whether extracting key information, identifying tasks, summarizing
sections, or pulling structured insights from a document.
It uses OpenRouter's gpt-oss-120b for advanced language understanding and
optionally enriches analysis with web search to validate references or
provide additional context.
use_cases:
when_to_use:
- User uploads a document and wants a summary
- User wants important information extracted from a report
- User wants tasks, instructions, or recommendations identified
- User wants important dates, numbers, or entities extracted
- User wants a structured outline of a document
- User wants to compare multiple documents
when_not_to_use:
- PDF form filling (use pdf-processing agent)
- Table extraction only (use pdf-processing agent)
- PDF merging, splitting, or editing (use pdf-manipulator agent)
- Image extraction from documents (use pdf-image-extractor agent)
- Real-time document streaming (not supported)
input_structure: |
Accepts one or more document files with a custom analytical prompt:
{
"files": [
{
"name": "document.pdf",
"mime_type": "application/pdf",
"data": "<base64_encoded_bytes>"
}
],
"prompt": "Summarize key points and extract important dates",
"options": {
"web_search": true,
"cite_sections": true,
"output_format": "structured"
}
}
file_constraints:
- Max size: 50MB
- Max pages: 500
- Formats: PDF 1.0–2.0, DOCX (Office Open XML)
output_format: |
Structured Analysis:
{
"success": true,
"analysis": {
"prompt": "Summarize key points and extract important dates",
"sections": [
{
"heading": "Key Summary",
"content": "The document discusses...",
"citations": ["Page 3"],
"confidence": 0.93
},
{
"heading": "Important Dates",
"content": "March 15, 2026 – Submission deadline...",
"citations": ["Page 5"],
"confidence": 0.91
}
]
},
"metadata": {
"filename": "document.pdf",
"total_pages": 12,
"processing_time_ms": 3200,
"web_search_used": true
}
}
error_handling:
- "Unsupported file type: Returns error with list of supported formats"
- "Empty or corrupted file: Returns validation error with details"
- "Prompt too vague: Agent asks clarifying question before proceeding"
- "Document exceeds size limit: Returns error with file size constraints"
- "Web search unavailable: Falls back to document-only analysis"
- "Timeout: Returns partial analysis with notice of truncation"
best_practices:
for_developers:
- "Encourage users to provide clear and focused prompts"
- "Use cite_sections option to ground answers in document evidence"
- "Enable web search when contextual knowledge is required"
- "Handle large documents carefully with truncation warnings"
- "Cache analysis results for repeated queries"
for_orchestrators:
- "Route to pdf-processing if table extraction is required"
- "Chain with question-answering agent for follow-up queries"
- "Use file hash to prevent repeated processing"
- "Monitor token usage for large documents"
- "Implement retry logic for large document processing"
installation: |
Required packages:
pip install pypdf python-docx
No system-level dependencies required for standard PDF/DOCX text extraction.
For scanned PDFs requiring OCR, chain with pdf-processing skill.
versioning:
- version: "1.0.0"
date: "2025-03-06"
changes: "Initial release with prompt-driven document analysis"
- version: "1.1.0"
date: "2026-03-06"
changes: "Added preprocessing, multi-document analysis, and confidence scoring"
# Assessment fields for skill negotiation
assessment:
keywords:
- analyze
- analysis
- document
- summarize
- summary
- extract
- insights
- review
- key points
- information
- tasks
- dates
- statistics
specializations:
- domain: document_analysis
confidence_boost: 0.4
- domain: contract_review
confidence_boost: 0.3
- domain: financial_analysis
confidence_boost: 0.3
- domain: custom_prompt_analysis
confidence_boost: 0.4
anti_patterns:
- "fill form"
- "form filling"
- "merge pdf"
- "split pdf"
- "edit pdf"
- "create pdf"
- "generate pdf"
- "extract images"
- "convert pdf"
complexity_indicators:
simple:
- "summarize"
- "what is this about"
- "key points"
medium:
- "extract information"
- "identify topics"
- "list dates"
- "find numbers"
complex:
- "cross reference"
- "compare documents"
- "multi-section analysis"
- "audit trail"
How It Works
Document Processingextract_text_from_pdf: Parses PDF files using pypdf libraryextract_text_from_docx: Extracts text from DOCX files- Handles file bytes and base64 decoding
- Multi-file support with combined analysis
- Processes Bindu A2A FilePart messages
- Handles both text prompts and file uploads
- Role-based message filtering (user messages only)
- Robust error handling for malformed data
- Prompt-driven document analysis
- Research insights extraction (methodology, gaps, findings)
- Concise summarization capabilities
- Content-based responses only (no hallucination)
- PDF documents via pypdf reader
- DOCX documents via python-docx
- MIME type detection and validation
- Automatic text extraction and processing
Dependencies
uv init
uv add bindu agno python-dotenv pypdf python-docx
Environment Setup
Create.env file:
OPENROUTER_API_KEY=your_openrouter_api_key_here
Run
uv run document-analyzer.py
- “Analyze the uploaded contract and extract all key obligations”
- “Review the terms and conditions document and highlight any unusual clauses”
- “Extract key dates and deadlines from the legal agreement”
Example API Calls
Message Send Request
Message Send Request
{
"jsonrpc": "2.0",
"method": "message/send",
"params": {
"message": {
"role": "user",
"kind": "message",
"messageId": "9f11c870-5616-49ad-b187-d93cbb100001",
"contextId": "9f11c870-5616-49ad-b187-d93cbb100002",
"taskId": "9f11c870-5616-49ad-b187-d93cbb100003",
"parts": [
{
"kind": "text",
"text": "Analyze the uploaded contract and extract all key obligations"
}
]
},
"skillId": "document-analysis-v1",
"configuration": {
"acceptedOutputModes": ["application/json"]
}
},
"id": "9f11c870-5616-49ad-b187-d93cbb100003"
}
Task get Request
Task get Request
{
"jsonrpc": "2.0",
"method": "tasks/get",
"params": {
"taskId": "9f11c870-5616-49ad-b187-d93cbb100003"
},
"id": "9f11c870-5616-49ad-b187-d93cbb100004"
}
Frontend Setup
# Clone the Bindu repository
git clone https://github.com/GetBindu/Bindu
# Navigate to frontend directory
cd frontend
# Install dependencies
npm install
# Start frontend development server
npm run dev