tools/search_tools.py import requests from langchain.tools import Tool from typing import Optional, Dict, Any import json import time import logging from bs4 import BeautifulSoup
logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__)
class BaiduSearchTool: """百度搜索""" def __init__(self): self.base_url = "https://www.baidu.com/s" self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', } def search(self, query: str, num_results: int = 5) -> str: """ 使用百度网页搜索(带重试机制) """ for attempt in range(3): try: params = { 'wd': query, 'rn': num_results, 'ie': 'utf-8', 'oq': query } response = requests.get( self.base_url, params=params, headers=self.headers, timeout=30 ) if response.status_code == 200: result = self._parse_baidu_results_robust(response.text, query, num_results) if "未找到" not in result and "失败" not in result: return result else: logger.warning(f"百度搜索结果为空,尝试第 {attempt + 1} 次重试") else: logger.warning(f"百度搜索HTTP错误 {response.status_code},尝试第 {attempt + 1} 次重试") except Exception as e: logger.warning(f"百度搜索异常 {str(e)},尝试第 {attempt + 1} 次重试") if attempt < 2: time.sleep(1) return self._get_fallback_result(query) def _parse_baidu_results_robust(self, html: str, query: str, num_results: int) -> str: """百度搜索结果解析""" try: soup = BeautifulSoup(html, 'html.parser') results = [] possible_selectors = [ 'div.result', 'div.c-container', 'div[class*="result"]', 'div[class*="c-container"]', 'div.contentLeft', ] containers = [] for selector in possible_selectors: containers.extend(soup.select(selector)) if not containers: link_containers = soup.find_all(['div', 'table'], class_=lambda x: x and any(keyword in str(x) for keyword in ['result', 'c-', 'content'])) containers = link_containers for i, container in enumerate(containers[:num_results]): try: title = None title_selectors = ['h3', 'h3 a', 'a[data-click]', '.t a', '.c-title a'] for selector in title_selectors: title_elem = container.select_one(selector) if title_elem: title = title_elem.get_text().strip() break if not title: link_elem = container.find('a') title = link_elem.get_text().strip() if link_elem else "无标题" content = None content_selectors = ['.c-abstract', '.c-span-last', '.c-gap', '.content-right_8Zs40'] for selector in content_selectors: content_elem = container.select_one(selector) if content_elem: content = content_elem.get_text().strip() break if not content: desc_elements = container.find_all(['div', 'span'], class_=lambda x: x and any(keyword in str(x) for keyword in ['abstract', 'content', 'desc'])) if desc_elements: content = desc_elements[0].get_text().strip() else: if title_elem: title_elem.extract() content = container.get_text().strip()[:100] + "..." if container.get_text().strip() else "无内容摘要" results.append(f"{i+1}. {title}\n {content}") except Exception as e: logger.debug(f"解析单个搜索结果时出错: {str(e)}") continue if results: return f"百度搜索 '{query}' 的结果:\n" + "\n\n".join(results) else: return f"未找到关于 '{query}' 的搜索结果,请尝试其他关键词" except Exception as e: logger.error(f"解析百度搜索结果时出错: {str(e)}") return f"解析百度搜索结果时出错: {str(e)}" def _get_fallback_result(self, query: str) -> str: """备用搜索结果""" return f"""搜索 '{query}' 的结果(网络搜索暂时不可用): 建议您: 1. 检查网络连接 2. 尝试使用更具体的关键词 3. 稍后重试
当前提供的信息基于模型训练数据,可能不是最新的。"""
class ReliableSearchTool: """搜索工具""" def __init__(self): self.baidu_tool = BaiduSearchTool() def search(self, query: str, num_results: int = 5) -> str: """ 使用百度搜索 """ return self.baidu_tool.search(query, num_results)
def create_search_tools(): """搜索工具集合""" reliable_search = ReliableSearchTool() tools = [ Tool( name="web_search", func=reliable_search.search, description="使用网络搜索引擎获取最新信息。输入:搜索查询字符串" ), Tool( name="baidu_search", func=reliable_search.search, description="使用百度获取内容。输入:搜索查询字符串" ) ] return tools
search_tools = create_search_tools()
|