Agent Function Calling-动态SQL生成与数据库查询

实现一个基于自然语言查询的智能数据库交互系统,结合了OpenAI的Function Calling功能与SQLite数据库操作,核心功能是将用户的中文问题自动转换为SQL查询并返回结构化结果。该项目代码为构建自然语言驱动的数据分析系统提供了基础框架,可扩展应用于电商报表、CRM系统等场景。

from openai import OpenAI
import pymysql
import json
from dotenv import load_dotenv
load_dotenv()
client = OpenAI()

database_schema_string = """
-- 创建班级表
CREATE TABLE Classes (
class_id INT PRIMARY KEY,
class_name VARCHAR(100) NOT NULL
);

-- 创建学生表
CREATE TABLE Students (
student_id INT PRIMARY KEY,
name VARCHAR(100) NOT NULL,
class_id INT,
FOREIGN KEY (class_id) REFERENCES Classes(class_id)
);

-- 创建成绩表
CREATE TABLE Scores (
score_id INT PRIMARY KEY,
student_id INT,
subject VARCHAR(100) NOT NULL,
score FLOAT NOT NULL,
FOREIGN KEY (student_id) REFERENCES Students(student_id)
);
"""

connection = pymysql.connect(
host='127.0.0.1',
port=3306,
user='root',
password='12345678',
database='ALLLM',
charset='utf8mb4' # 添加推荐的字符集参数
)

cursor = connection.cursor()

def get_sql_completion(messages, model="gpt-4o-mini"):
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
tools=[{
"type": "function",
"function": {
"name": "ask_database",
"description": "使用这个函数来回答有关业务的用户问题。输出应该是一个完整的SQL查询",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": f"""
SQL查询提取信息以回答用户的问题。
SQL应该使用这个数据库架构来编写:
{database_schema_string}
查询应以纯文本形式返回,而不是JSON格式。
查询应仅包含MySQL支持的语法.
""",
}
},
"required": ["query"],
}
}
}],
)
return response.choices[0].message

def ask_database(query):
cursor.execute(query)
records = cursor.fetchall()
return records


prompt = "查询二班的学生英语成绩是多少?"
messages = [
{"role": "system", "content": "基于表回答用户问题"},
{"role": "user", "content": prompt}
]

response = get_sql_completion(messages)
messages.append(response)


if response.tool_calls is not None:
tool_call = response.tool_calls[0]
if tool_call.function.name == "ask_database":
arguments = tool_call.function.arguments
args = json.loads(arguments)
print("====SQL====")
print(args["query"])
result = ask_database(args["query"])
print("====MySQL数据库查询结果====")
print(result)

messages.append({
"tool_call_id": tool_call.id,
"role": "tool",
"name": "ask_database",
"content": str(result)
})
response = get_sql_completion(messages)
print("====最终回复====")
print(response.content)
  • 12306实时票务查询
# 这个项目通过爬取12306网站的车次信息,并调用查票函数,利用大模型的能力查询某日某趟车次的余票情况
# 1.首先定义爬虫函数
# 2.定义大模型调用函数
# 3.调用大模型给出回复

from openai import OpenAI
import os
import json
import requests
from dotenv import load_dotenv

load_dotenv()
import pandas as pd
from datetime import datetime

client = OpenAI()


def check_tick(date, start, end):
url = 'https://kyfw.12306.cn/otn/leftTicket/queryG?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT'.format(
date, start, end)
headers = {
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"If-Modified-Since": "0",
"Pragma": "no-cache",
"Referer": "https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
"sec-ch-ua": "\"Chromium\";v=\"128\", \"Not;A=Brand\";v=\"24\", \"Google Chrome\";v=\"128\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
cookies = {
"_uab_collina": "",
"JSESSIONID": "",
"BIGipServerotn": "",
"BIGipServerpassport": "",
"guidesStatus": "",
"highContrastMode": "",
"cursorStatus": "",
"route": "",
"_jc_save_fromStation": "",
"_jc_save_toStation": "",
"_jc_save_fromDate": "",
"_jc_save_toDate": "",
"_jc_save_wfdc_flag": ""
}
session = requests.session()
res = session.get(url, headers=headers, cookies=cookies)

data = res.json()

# 这是一个列表
result = data["data"]["result"]
print('result:', result)
lis = []
for index in result:
index_list = index.replace('有', 'Yes').replace('无', 'No').split('|')
print('index_list:',index_list)
train_number = index_list[3] # 车次
if 'G' in train_number:
time_1 = index_list[8] # 出发时间
time_2 = index_list[9] # 到达时间
prince_seat = index_list[25] # 特等座
first_class_seat = index_list[31] # 一等座
second_class = index_list[30] # 二等座
dit = {
'车次': train_number,
'出发时间': time_1,
'到站时间': time_2,
"是否可以预定": index_list[11],
}
lis.append(dit)
else:
# print(index_list)
time_1 = index_list[8] # 出发时间
time_2 = index_list[9] # 到达时间
dit = {
'车次': train_number,
'出发时间': time_1,
'到站时间': time_2,
"是否可以预定": index_list[11],

}
lis.append(dit)
# print(lis)
content = pd.DataFrame(lis)
# print(content)
return content


def check_date():
today = datetime.now().date()
return today


def get_completion(messages, model="gpt-3.5-turbo"):
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
max_tokens=1024,
tools=[
{
"type": "function",
"function": {
"name": "check_tick",
"description": "给定日期查询有没有票",
"parameters": {
"type": "object",
"properties": {
"date": {
"type": "string",
"description": "日期",
},
"start": {
"type": "string",
"description": "出发站 北京是BJP",
},
"end": {
"type": "string",
"description": "终点站 天津是TJP",
}
},
}
}
},
{
"type": "function",
"function": {
"name": "check_date",
"description": "返回当前的日期",
"parameters": {
"type": "object",
"properties": {
# "str": {
# "type": "string",
# "description": "返回今天的日期",
# }
}
}
}
}
]
)
return response.choices[0].message


prompt = "查询明天北京到上海的票"

messages = [
{"role": "system", "content": "你是一个地图通,你可以找到任何地址"},
{"role": "user", "content": prompt}
]
response = get_completion(messages)

if (response.content is None): # 解决 OpenAI 的一个 400 bug
response.content = ""
messages.append(response) # 把大模型的回复加入到对话中
print("=====GPT回复=====")
print(response)

# 如果返回的是函数调用结果,则打印出来
while (response.tool_calls is not None):
# 1106 版新模型支持一次返回多个函数调用请求
for tool_call in response.tool_calls:
args = json.loads(tool_call.function.arguments)
print("参数:", args)

if (tool_call.function.name == "check_tick"):
print("Call: check_tick")
result = check_tick(**args)
elif (tool_call.function.name == "check_date"):
print("Call: check_date")
result = check_date()

print("=====函数返回=====")
print(result)

messages.append({
"tool_call_id": tool_call.id, # 用于标识函数调用的 ID
"role": "tool",
"name": tool_call.function.name,
"content": str(result) # 数值result 必须转成字符串
})

response = get_completion(messages)
if (response.content is None): # 解决 OpenAI 的一个 400 bug
response.content = ""
print("=====GPT回复2=====")
print(response)
messages.append(response) # 把大模型的回复加入到对话中

print("=====最终回复=====")
print(response.content)