Create web-scraping-agent.py with the code below, or save it directly from your editor.
"""Web Scraping AI Agent (Local & Cloud SDK)Crawls web pages, extracts structured data, cleans and formats outputs,and prepares datasets for analysis or integration.Features:- ScrapeGraph AI for intelligent structured extraction- Mem0 for persistent memory (dedup, extraction profiles)- OpenRouter (openai/gpt-oss-120b) for synthesis and formatting- Local run mode + Bindu Cloud SDK deploymentUsage: python web_scraping_agent.pyEnvironment: Requires SCRAPEGRAPH_API_KEY, MEM0_API_KEY, OPENROUTER_API_KEY in .env file"""import osfrom dotenv import load_dotenvload_dotenv()from bindu.penguin.bindufy import bindufyfrom agno.agent import Agentfrom agno.models.openrouter import OpenRouterfrom agno.tools.scrapegraph import ScrapeGraphToolsfrom agno.tools.mem0 import Mem0Tools# Initialize the web scraping agentagent = Agent( instructions=( "You are a web scraping assistant. Given a URL and an optional extraction prompt, " "use ScrapeGraph to extract structured data from the page. Clean and format the output " "into JSON. Use memory to avoid re-scraping URLs you have already processed and to " "remember extraction preferences ." ), model=OpenRouter( id="openai/gpt-oss-120b", api_key=os.getenv("OPENROUTER_API_KEY"), ), tools=[ ScrapeGraphTools(api_key=os.getenv("SCRAPEGRAPH_API_KEY")), Mem0Tools(api_key=os.getenv("MEM0_API_KEY")), ],)# Agent configuration for Binduconfig = { "author": "bindu.builder@getbindu.com", "name": "web_scraping_agent", "description": ( "AI-enabled web scraping agent that collects, structures, and processes " "data from websites for analysis and automation." ), "deployment": { "url": "http://localhost:3773", "expose": True, "cors_origins": ["http://localhost:5173"], }, "skills": ["skills/web-scraping-skill"],}def handler(messages: list[dict[str, str]]): """ Process incoming messages and return agent response. Args: messages: List of message dictionaries containing conversation history Returns: Extracted and structured data from the requested web page """ if messages: latest = ( messages[-1].get("content", "") if isinstance(messages[-1], dict) else str(messages[-1]) ) result = agent.run(input=latest) if hasattr(result, "content"): return result.content elif hasattr(result, "response"): return result.response return str(result) return "Please provide a URL and an extraction prompt."if __name__ == "__main__": # Bindu-fy the agent — converts it to a discoverable, interoperable Bindu agent bindufy(config, handler)
# Clone the Bindu repositorygit clone https://github.com/GetBindu/Bindu# Navigate to frontend directorycd frontend# Install dependenciesnpm install# Start frontend development servernpm run dev