JS大模型基础实践(含Structured Outputs)
1. 引言
昨天在做Beyond Books 不止于纸上的故事的多Agent、流式返回、Structured Outputs(结构化输出)的时候(现在还在分支开发,没提主干),在网上找了好久。
所以在此留下一些很标准的JS的GPT调用代码。
传统的、流式、json、结构化输出都有。
2. 环境准备
- 纯html,有个浏览器就行。新建一个文件夹,改后缀html,往里面塞示例代码,填入API Key和地址之后双击html就能用。
- 只要扔给AI,就可以方便地写进其他项目里,比如我的Beyond Books 不止于纸上的故事
3. 如果你还没有大模型API
这篇给你准备了超新手的教程
- 选择合适的LLM服务提供商
- 拿到API keys就可以玩了
- 下面的示例中,把YOUR_API_KEY替换为你的API_KEY,把YOUR_API_URL替换为你的API_URL
有啥坑:比如这家 gptapi CORS没配置好,本地以及一些自部署的环境API连接不到它
而且它还不支持流式
注:官方API文档 https://platform.openai.com/docs/api-reference/chat/create
4. 介绍与示例
传统
就是你来我往的整块回复。
用习惯网页端的朋友们注意了,API基本都是无状态的,不会维护会话历史,这历史从来都是自己存的。
网页端只是帮忙存而已。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
| <!DOCTYPE html> <html lang="zh"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>OpenAI 聊天</title> <style> body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; } #chat { height: 400px; border: 1px solid #ccc; overflow-y: scroll; padding: 10px; margin-bottom: 10px; } #userInput { width: 100%; padding: 5px; } #sendButton { margin-top: 10px; } </style> </head> <body> <h1>OpenAI 聊天</h1> <div id="chat"></div> <input type="text" id="userInput" placeholder="输入您的消息..."> <button id="sendButton">发送</button>
<script> const chatDiv = document.getElementById('chat'); const userInput = document.getElementById('userInput'); const sendButton = document.getElementById('sendButton');
const API_KEY = YOUR_API_KEY; const API_URL = YOUR_API_URL + "/v1/chat/completions"; const MODEL = 'gpt-4o-mini';
sendButton.addEventListener('click', sendMessage); userInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); });
class AIService { static async callLargeLanguageModel(prompt) { console.log(`[DEBUG] 开始调用大语言模型,提示词: ${prompt}`); console.log(`[DEBUG] API_URL: ${API_URL}`); console.log(`[DEBUG] MODEL: ${MODEL}`); const requestBody = JSON.stringify({ model: MODEL, messages: [{ role: "user", content: prompt }], max_tokens: 1000 }); console.log(`[DEBUG] 请求体: ${requestBody}`);
try { console.log(`[DEBUG] 发送API请求...`); const response = await fetch(API_URL, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${API_KEY}`, 'Accept': 'application/json' }, body: requestBody, credentials: 'include' });
console.log(`[DEBUG] 收到API响应,状态码: ${response.status}`);
if (!response.ok) { console.error(`[ERROR] API请求失败: ${response.status}`); throw new Error(`API 请求失败: ${response.status}`); }
const data = await response.json(); console.log(`[DEBUG] API响应数据: ${JSON.stringify(data)}`);
const content = data.choices[0].message.content; console.log(`[DEBUG] 模型返回内容长度: ${content.length}`); return content; } catch (error) { console.error(`[ERROR] 调用大语言模型时发生错误: ${error.message}`); throw error; } } }
async function sendMessage() { const message = userInput.value.trim(); if (message) { appendMessage('用户: ' + message); userInput.value = '';
try { const response = await AIService.callLargeLanguageModel(message); appendMessage('助手: ' + response); } catch (error) { console.error('错误:', error); appendMessage('助手: 抱歉,发生了错误。'); } } }
function appendMessage(message) { const messageDiv = document.createElement('div'); messageDiv.textContent = message; chatDiv.appendChild(messageDiv); chatDiv.scrollTop = chatDiv.scrollHeight; } </script> </body> </html>
|
流式
比较丝滑,可以在响应的一开始就开始显示。
问题就是不太能json格式化解析,适合做思维链的最后一个显示端。
就是到最后一步的时候不用json格式解析了,就让GPT流式返回。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
| <!DOCTYPE html> <html lang="zh"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>OpenAI 流式回复聊天</title> <style> body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; } #chat { height: 400px; border: 1px solid #ccc; overflow-y: scroll; padding: 10px; margin-bottom: 10px; } #userInput { width: 100%; padding: 5px; } #sendButton { margin-top: 10px; } </style> </head> <body> <h1>OpenAI 流式回复聊天</h1> <div id="chat"></div> <input type="text" id="userInput" placeholder="输入您的消息..."> <button id="sendButton">发送</button>
<script> const chatDiv = document.getElementById('chat'); const userInput = document.getElementById('userInput'); const sendButton = document.getElementById('sendButton');
const OPENAI_API_KEY = YOUR_API_KEY;
sendButton.addEventListener('click', sendMessage); userInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); });
async function sendMessage() { const message = userInput.value.trim(); if (message) { appendMessage('用户: ' + message); userInput.value = '';
try { const response = await fetch(YOUR_API_URL + "/v1/chat/completions", { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${OPENAI_API_KEY}` }, body: JSON.stringify({ model: 'gpt-3.5-turbo', messages: [{ role: 'user', content: message }], stream: true }) });
const reader = response.body.getReader(); const decoder = new TextDecoder('utf-8'); let buffer = ''; let assistantResponse = '';
while (true) { const { done, value } = await reader.read(); if (done) break;
buffer += decoder.decode(value, {stream: true}); const lines = buffer.split('\n'); for (let i = 0; i < lines.length - 1; i++) { const line = lines[i]; if (line.startsWith('data: ')) { const jsonStr = line.slice(6); if (jsonStr === '[DONE]') { break; } try { const data = JSON.parse(jsonStr); if (data.choices && data.choices[0].delta.content) { assistantResponse += data.choices[0].delta.content; updateAssistantMessage(assistantResponse); } } catch (e) { console.error('JSON解析错误:', e); } } } buffer = lines[lines.length - 1]; } } catch (error) { console.error('错误:', error); appendMessage('助手: 抱歉,发生了错误。'); } } }
function appendMessage(message) { const messageDiv = document.createElement('div'); messageDiv.textContent = message; chatDiv.appendChild(messageDiv); chatDiv.scrollTop = chatDiv.scrollHeight; }
function updateAssistantMessage(message) { const lastMessage = chatDiv.lastElementChild; if (lastMessage && lastMessage.textContent.startsWith('助手: ')) { lastMessage.textContent = '助手: ' + message; } else { appendMessage('助手: ' + message); } chatDiv.scrollTop = chatDiv.scrollHeight; } </script> </body> </html>
|
json
古老的json格式化回复,问题是有时候不太遵循,可能会漏字段,或者字段顺序乱了。
特别是多轮对话的时候,GPT很容易被对话历史带跑,对于压缩的对话历史就更是如此了。
现在有了结构化输出,一般都不用这个json模式了。
更古老的时候没有response_format: { type: “json_object” },它甚至连json格式都不太能正确了,还得修复(比如冷不防起手三个反引号)。
现在好像sonnet那些模型支持json很麻烦。如果你在使用一个没有json模式的模型,可以在思维链后面接上4o-mini的结构化输出。
这示例里是解析出了display,所以看着还跟传统的一样,你可以问ai怎么加字段,比如加个好感度变化什么的就能做虚拟恋人了。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
| <!DOCTYPE html> <html lang="zh"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>OpenAI JSON格式回复聊天</title> <style> body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; } #chat { height: 400px; border: 1px solid #ccc; overflow-y: scroll; padding: 10px; margin-bottom: 10px; } #userInput { width: 100%; padding: 5px; } #sendButton { margin-top: 10px; } </style> </head> <body> <h1>OpenAI JSON格式回复聊天</h1> <div id="chat"></div> <input type="text" id="userInput" placeholder="输入您的消息..."> <button id="sendButton">发送</button>
<script> const chatDiv = document.getElementById('chat'); const userInput = document.getElementById('userInput'); const sendButton = document.getElementById('sendButton');
const API_KEY = YOUR_API_KEY; const API_URL = YOUR_API_URL + "/v1/chat/completions"; const MODEL = 'gpt-4o-mini'; const SYSTEM_PROMPT = "你是一个友好的AI助手。请以JSON格式回复,包含'display'字段用于显示给用户的内容。";
sendButton.addEventListener('click', sendMessage); userInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); });
class AIService { static async callLargeLanguageModel(userMessage) { console.log(`[DEBUG] 开始调用大语言模型,用户消息: ${userMessage}`); console.log(`[DEBUG] API_URL: ${API_URL}`); console.log(`[DEBUG] MODEL: ${MODEL}`); const requestBody = JSON.stringify({ model: MODEL, messages: [ { role: "system", content: SYSTEM_PROMPT }, { role: "user", content: userMessage } ], max_tokens: 1000, response_format: { type: "json_object" } }); console.log(`[DEBUG] 请求体: ${requestBody}`);
try { console.log(`[DEBUG] 发送API请求...`); const response = await fetch(API_URL, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${API_KEY}`, 'Accept': 'application/json' }, body: requestBody, credentials: 'include' });
console.log(`[DEBUG] 收到API响应,状态码: ${response.status}`);
if (!response.ok) { console.error(`[ERROR] API请求失败: ${response.status}`); throw new Error(`API 请求失败: ${response.status}`); }
const data = await response.json(); console.log(`[DEBUG] API响应数据: ${JSON.stringify(data)}`);
const content = data.choices[0].message.content; console.log(`[DEBUG] 模型返回内容长度: ${content.length}`); return JSON.parse(content); } catch (error) { console.error(`[ERROR] 调用大语言模型时发生错误: ${error.message}`); throw error; } } }
async function sendMessage() { const message = userInput.value.trim(); if (message) { appendMessage('用户: ' + message); userInput.value = '';
try { const response = await AIService.callLargeLanguageModel(message); appendMessage('助手: ' + response.display); } catch (error) { console.error('错误:', error); appendMessage('助手: 抱歉,发生了错误。'); } } }
function appendMessage(message) { const messageDiv = document.createElement('div'); messageDiv.textContent = message; chatDiv.appendChild(messageDiv); chatDiv.scrollTop = chatDiv.scrollHeight; } </script> </body> </html>
|
结构化输出
结构化输出能保证输出是json格式,完全符合你定义的schema的json格式。
输出的顺序也是确定的按照你key给的顺序。
Open AI的伟大发明。文档链接。
这个只有4o和4o-mini能用。4tubro那些还是只能用json模式。
o1不支持,o1它是个Agent,连json模式都不支持。
有个坑,description不会影响输出,纯纯是给你看的注释
已经试过了,你可以试试
1 2 3 4 5 6 7 8 9 10 11
| const TEST_SCHEMA = { type: "object", properties: { test: { type: "string", description: "在这个字段,你必须输出“在测试哦”。" } }, required: ["test"], additionalProperties: false }
|
然后看它瞎回复。
下面其实是官方示例的JavaScript版。可以问AI改成别的,或者拿着这个去改造古老的json模式。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
| <!DOCTYPE html> <html lang="zh"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>OpenAI 结构化输出聊天</title> <style> body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; } #chat { height: 400px; border: 1px solid #ccc; overflow-y: scroll; padding: 10px; margin-bottom: 10px; } #userInput { width: 100%; padding: 5px; } #sendButton { margin-top: 10px; } </style> </head> <body> <h1>OpenAI 结构化输出聊天</h1> <div id="chat"></div> <input type="text" id="userInput" placeholder="输入您的消息..."> <button id="sendButton">发送</button>
<script> const chatDiv = document.getElementById('chat'); const userInput = document.getElementById('userInput'); const sendButton = document.getElementById('sendButton');
const API_KEY = YOUR_API_KEY; const API_URL = YOUR_API_URL + "/v1/chat/completions"; const MODEL = 'gpt-4o-mini'; const SYSTEM_PROMPT = "你是一个友好的数学辅导员。请逐步引导用户解决问题。";
const OUTPUT_SCHEMA = { type: "object", properties: { steps: { type: "array", items: { type: "object", properties: { explanation: { type: "string" }, output: { type: "string" } }, required: ["explanation", "output"], additionalProperties: false } }, final_answer: { type: "string" } }, required: ["steps", "final_answer"], additionalProperties: false };
sendButton.addEventListener('click', sendMessage); userInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); });
class AIService { static async callLargeLanguageModel(userMessage) { console.log(`[DEBUG] 开始调用大语言模型,用户消息: ${userMessage}`); console.log(`[DEBUG] API_URL: ${API_URL}`); console.log(`[DEBUG] MODEL: ${MODEL}`); const requestBody = JSON.stringify({ model: MODEL, messages: [ { role: "system", content: SYSTEM_PROMPT }, { role: "user", content: userMessage } ], response_format: { type: "json_schema", json_schema: { name: "math_response", schema: OUTPUT_SCHEMA, strict: true } } }); console.log(`[DEBUG] 请求体: ${requestBody}`);
try { console.log(`[DEBUG] 发送API请求...`); const response = await fetch(API_URL, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${API_KEY}`, 'Accept': 'application/json' }, body: requestBody });
console.log(`[DEBUG] 收到API响应,状态码: ${response.status}`);
if (!response.ok) { console.error(`[ERROR] API请求失败: ${response.status}`); throw new Error(`API 请求失败: ${response.status}`); }
const data = await response.json(); console.log(`[DEBUG] API响应数据: ${JSON.stringify(data)}`);
const content = data.choices[0].message.content; console.log(`[DEBUG] 模型返回内容长度: ${content.length}`); return JSON.parse(content); } catch (error) { console.error(`[ERROR] 调用大语言模型时发生错误: ${error.message}`); throw error; } } }
async function sendMessage() { const message = userInput.value.trim(); if (message) { appendMessage('用户: ' + message); userInput.value = '';
try { const response = await AIService.callLargeLanguageModel(message); response.steps.forEach((step, index) => { appendMessage(`步骤 ${index + 1}:`); appendMessage(`解释: ${step.explanation}`); appendMessage(`结果: ${step.output}`); }); appendMessage('最终答案: ' + response.final_answer); } catch (error) { console.error('错误:', error); appendMessage('助手: 抱歉,发生了错误。'); } } }
function appendMessage(message) { const messageDiv = document.createElement('div'); messageDiv.textContent = message; chatDiv.appendChild(messageDiv); chatDiv.scrollTop = chatDiv.scrollHeight; } </script> </body> </html>
|
5. 总结
functionCall请见大模型基础技术学习
主要是我不喜欢functionCall,因为结构化输出可以和数据交互,够用了,而且functionCall双倍tokens。