Skip to content

Commit 2a0a620

Browse files
authored
Merge pull request #44 from solar2ain/feat/reasoning-error-detection-arena
feat: 多适配器 reasoning 提取、智能错误检测、全尺寸原图与 Arena 域名更新
2 parents bf55300 + ae81ec8 commit 2a0a620

13 files changed

Lines changed: 428 additions & 59 deletions

File tree

‎src/backend/adapter/chatgpt.js‎

Lines changed: 95 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,79 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
110110
return { error: `API 返回错误: HTTP ${conversationResponse.status()}` };
111111
}
112112

113+
// 5.5 解析 conversation 响应,检查是否是纯文本回复(拒绝/限流场景)
114+
let conversationText = '';
115+
let isImageGenerationStarted = false;
116+
let conversationBody = '';
117+
try {
118+
conversationBody = await conversationResponse.text();
119+
120+
// 检查是否有图片生成相关的内容 (dalle 工具调用或 file_ 文件引用)
121+
// 注意:不使用 'image' 关键词,因为拒绝消息也会包含这个词
122+
isImageGenerationStarted = conversationBody.includes('dalle') || conversationBody.includes('file_');
123+
logger.debug('适配器', `isImageGenerationStarted: ${isImageGenerationStarted}`, meta);
124+
125+
// 提取文本内容
126+
const lines = conversationBody.split('\n');
127+
for (const line of lines) {
128+
if (!line.startsWith('data: ')) continue;
129+
const dataStr = line.slice(6).trim();
130+
if (dataStr === '[DONE]') continue;
131+
try {
132+
const data = JSON.parse(dataStr);
133+
// 提取初始文本 (channel=final 的 assistant 消息)
134+
if (data.v?.message?.channel === 'final' &&
135+
data.v?.message?.author?.role === 'assistant' &&
136+
data.v?.message?.content?.parts?.length > 0) {
137+
const part = data.v.message.content.parts[0];
138+
if (typeof part === 'string') {
139+
conversationText = part;
140+
}
141+
}
142+
// patch 格式累加 (data.v 是 patch 操作数组)
143+
if (Array.isArray(data.v)) {
144+
for (const patch of data.v) {
145+
if (patch.o === 'append' && patch.p === '/message/content/parts/0' && patch.v) {
146+
conversationText += patch.v;
147+
}
148+
}
149+
}
150+
} catch { }
151+
}
152+
logger.debug('适配器', `提取到文本 (${conversationText.length} 字符): ${conversationText.substring(0, 200)}...`, meta);
153+
} catch (e) {
154+
logger.warn('适配器', `解析 conversation 响应失败: ${e.message}`, meta);
155+
}
156+
157+
// 早期检测:如果文本表明是拒绝/限流,立即返回,不等待图片超时
158+
if (conversationText) {
159+
// 检查是否是速率限制错误 (不重试,同账号重试也没用)
160+
const isRateLimit = conversationBody.includes('RateLimitException') ||
161+
conversationBody.includes('rate limit') ||
162+
/limit.*reset/i.test(conversationText);
163+
164+
if (isRateLimit) {
165+
logger.warn('适配器', `早期检测到速率限制: ${conversationText.substring(0, 200)}...`, meta);
166+
return { error: `触发速率限制: ${conversationText.substring(0, 200)}`, retryable: false };
167+
}
168+
169+
// 如果没有图片生成迹象,检查是否是内容被拒���
170+
if (!isImageGenerationStarted) {
171+
const isContentRejection = /cannot|can't|unable|sorry|policy|violat/i.test(conversationText);
172+
if (isContentRejection) {
173+
logger.warn('适配器', `早期检测到内容拒绝: ${conversationText.substring(0, 200)}...`, meta);
174+
return { error: `内容被拒绝: ${conversationText.substring(0, 200)}`, retryable: false };
175+
}
176+
}
177+
}
178+
113179
logger.info('适配器', '生成中,等待图片就绪...', meta);
114180

115181
// 6. 监听文件状态接口,等待图片生成完成
116-
// 通过 file_name 是否包含 .part 判断是否生成完成
182+
// 如果 conversation 响应中没有图片生成迹象,使用较短超时
117183
let downloadUrl = null;
118184
let fileName = null;
185+
const imageTimeout = isImageGenerationStarted ? 120000 : 30000;
119186

120187
try {
121188
await page.waitForResponse(async (response) => {
@@ -128,11 +195,6 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
128195
const fn = json.file_name;
129196
const dl = json.download_url;
130197

131-
// 检查是否生成完成:
132-
// 1. 必须有 file_name
133-
// 2. file_name 开头必须是 user- (生成的图片)
134-
// 3. file_name 不能包含 .part(表示中间状态)
135-
// 4. 必须有 download_url
136198
if (fn && fn.startsWith('user-') && !fn.includes('.part') && dl) {
137199
fileName = fn;
138200
downloadUrl = dl;
@@ -145,8 +207,34 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
145207
} catch {
146208
return false;
147209
}
148-
}, { timeout: waitTimeout });
210+
}, { timeout: imageTimeout });
149211
} catch (e) {
212+
logger.debug('适配器', `等待图片超时, conversationText长度: ${conversationText.length}, downloadUrl: ${downloadUrl}`, meta);
213+
214+
// 超时时检查是否有 conversation 中的文本内容
215+
if (conversationText && !downloadUrl) {
216+
const isRateLimit = conversationBody.includes('RateLimitException') ||
217+
conversationBody.includes('rate limit') ||
218+
/limit.*reset/i.test(conversationText);
219+
220+
if (isRateLimit) {
221+
logger.warn('适配器', `触发速率限制: ${conversationText.substring(0, 200)}...`, meta);
222+
return { error: `触发速率限制: ${conversationText.substring(0, 200)}`, retryable: false };
223+
}
224+
225+
logger.warn('适配器', `模型返回文本而非图片: ${conversationText.substring(0, 200)}...`, meta);
226+
return { error: `模型返回文本而非图片: ${conversationText.substring(0, 200)}`, retryable: false };
227+
}
228+
229+
// 如果没有提取到文本,但有原始响应体,尝试用简单方式提取
230+
if (!conversationText && conversationBody) {
231+
const partsMatch = conversationBody.match(/"parts":\s*\["([^"]+)"\]/);
232+
if (partsMatch && partsMatch[1]) {
233+
logger.warn('适配器', `通过正则提取到文本: ${partsMatch[1].substring(0, 200)}...`, meta);
234+
return { error: `模型返回文本而非图片: ${partsMatch[1].substring(0, 200)}`, retryable: false };
235+
}
236+
}
237+
150238
const pageError = normalizePageError(e, meta);
151239
if (pageError) return pageError;
152240
throw e;

‎src/backend/adapter/deepseek_text.js‎

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ async function configureModel(page, modelConfig, meta = {}) {
7979
* @param {string[]} imgPaths - 图片路径数组 (此适配器不支持)
8080
* @param {string} [modelId] - 模型 ID
8181
* @param {object} [meta={}] - 日志元数据
82-
* @returns {Promise<{text?: string, error?: string}>}
82+
* @returns {Promise<{text?: string, reasoning?: string, error?: string}>}
8383
*/
8484
async function generate(context, prompt, imgPaths, modelId, meta = {}) {
8585
const { page, config } = context;
@@ -108,8 +108,10 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
108108
logger.debug('适配器', '启动 API 监听...', meta);
109109

110110
let textContent = '';
111+
let thinkingContent = ''; // thinking 内容
111112
let isComplete = false;
112113
let isCollecting = false; // 当前最后一个 fragment 是否为 RESPONSE 类型
114+
let isCollectingThinking = false; // 是否正在收集 thinking
113115

114116
const responsePromise = page.waitForResponse(async (response) => {
115117
const url = response.url();
@@ -133,14 +135,21 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
133135

134136
// --- 处理 fragment 列表变更,更新 isCollecting 状态 ---
135137

136-
// 初始响应中可能已有 fragments (如 SEARCH / RESPONSE)
138+
// 初始响应中可能已有 fragments (如 THINK / SEARCH / RESPONSE)
137139
if (data.v?.response?.fragments && Array.isArray(data.v.response.fragments)) {
138140
for (const fragment of data.v.response.fragments) {
139141
if (fragment.type === 'RESPONSE') {
140142
isCollecting = true;
143+
isCollectingThinking = false;
141144
if (fragment.content) textContent += fragment.content;
145+
} else if (fragment.type === 'THINK') {
146+
// DeepSeek 使用 THINK (不是 THINKING)
147+
isCollectingThinking = true;
148+
isCollecting = false;
149+
if (fragment.content) thinkingContent += fragment.content;
142150
} else {
143151
isCollecting = false;
152+
isCollectingThinking = false;
144153
}
145154
}
146155
}
@@ -150,9 +159,15 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
150159
for (const fragment of data.v) {
151160
if (fragment.type === 'RESPONSE') {
152161
isCollecting = true;
162+
isCollectingThinking = false;
153163
if (fragment.content) textContent += fragment.content;
164+
} else if (fragment.type === 'THINK') {
165+
isCollectingThinking = true;
166+
isCollecting = false;
167+
if (fragment.content) thinkingContent += fragment.content;
154168
} else {
155169
isCollecting = false;
170+
isCollectingThinking = false;
156171
}
157172
}
158173
}
@@ -164,9 +179,15 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
164179
for (const fragment of item.v) {
165180
if (fragment.type === 'RESPONSE') {
166181
isCollecting = true;
182+
isCollectingThinking = false;
167183
if (fragment.content) textContent += fragment.content;
184+
} else if (fragment.type === 'THINK') {
185+
isCollectingThinking = true;
186+
isCollecting = false;
187+
if (fragment.content) thinkingContent += fragment.content;
168188
} else {
169189
isCollecting = false;
190+
isCollectingThinking = false;
170191
}
171192
}
172193
}
@@ -182,15 +203,21 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
182203
// 带路径的 content 操作 (如 response/fragments/-1/content)
183204
if (data.p && typeof data.v === 'string') {
184205
const match = data.p.match(/response\/fragments\/(-?\d+)\/content/);
185-
if (match && isCollecting) {
186-
textContent += data.v;
206+
if (match) {
207+
if (isCollecting) {
208+
textContent += data.v;
209+
} else if (isCollectingThinking) {
210+
thinkingContent += data.v;
211+
}
187212
}
188213
}
189214

190215
// 纯文本追加 (只有 v 字符串,没有 p 和 o)
191216
if (data.v && typeof data.v === 'string' && !data.p && !data.o) {
192217
if (isCollecting) {
193218
textContent += data.v;
219+
} else if (isCollectingThinking) {
220+
thinkingContent += data.v;
194221
}
195222
}
196223

@@ -233,7 +260,16 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
233260

234261
logger.info('适配器', `已获取文本内容 (${textContent.length} 字符)`, meta);
235262
logger.info('适配器', '文本生成完成,任务完成', meta);
236-
return { text: textContent.trim() };
263+
264+
const trimmedThinking = thinkingContent.trim();
265+
const result = { text: textContent.trim() };
266+
267+
// 返回结果(如果有 thinking 则包含 reasoning)
268+
if (trimmedThinking) {
269+
logger.info('适配器', `已获取思考过程 (${trimmedThinking.length} 字符)`, meta);
270+
result.reasoning = trimmedThinking;
271+
}
272+
return result;
237273

238274
} catch (err) {
239275
// 顶层错误处理

‎src/backend/adapter/gemini.js‎

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,16 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
175175
return { error: errorMsg };
176176
}
177177

178-
// 取第一张图片,追加 =s1024-rj 获取高分辨率
179-
const imageUrl = imageUrls[0] + '=s1024-rj';
178+
// 取第一张图片,追加 =d-I 获取全尺寸原图(而非 =s1024-rj 的缩略图)
179+
const imageUrl = imageUrls[0] + '=d-I';
180180
logger.info('适配器', `找到 ${imageUrls.length} 张图片,开始下载...`, meta);
181181

182+
// 提取图片生成的详细描述(thinking)
183+
const thinking = extractImageThinking(bodyBuffer);
184+
if (thinking) {
185+
logger.info('适配器', `提取到详细描述,长度: ${thinking.length}`, meta);
186+
}
187+
182188
// 使用封装的下载函数
183189
const imgDlCfg = config?.backend?.pool?.failover || {};
184190
const result = await useContextDownload(imageUrl, page, {
@@ -190,7 +196,8 @@ async function generate(context, prompt, imgPaths, modelId, meta = {}) {
190196
}
191197

192198
logger.info('适配器', '已获取图片,任务完成', meta);
193-
return result;
199+
// 返回图片和 thinking(如果有)
200+
return thinking ? { ...result, reasoning: thinking } : result;
194201
}
195202

196203
} catch (err) {
@@ -428,4 +435,57 @@ function extractAiTextFromResponse(bodyBuffer) {
428435
}
429436
}
430437
return best;
438+
}
439+
440+
/**
441+
* 深度遍历,查找长文本描述(图片生成的 thinking/详细描述)
442+
* 排除 URL、base64、分类器名称等非描述性长字符串
443+
* @param {any} root - 要遍历的对象
444+
* @returns {string} 最长的描述文本,未找到则返回空字符串
445+
*/
446+
function findLongDescriptionDeep(root) {
447+
const candidates = [];
448+
const stack = [root];
449+
450+
while (stack.length) {
451+
const cur = stack.pop();
452+
if (!cur) continue;
453+
454+
if (typeof cur === 'string') {
455+
if (cur.length > 200 &&
456+
!cur.startsWith('http') &&
457+
!cur.startsWith('data:') &&
458+
!cur.includes('googleapis.com') &&
459+
!cur.includes('googleusercontent.com') &&
460+
!/^[A-Za-z0-9+/=]{100,}$/.test(cur)) {
461+
candidates.push(cur);
462+
}
463+
} else if (Array.isArray(cur)) {
464+
for (const v of cur) stack.push(v);
465+
} else if (typeof cur === 'object') {
466+
for (const v of Object.values(cur)) stack.push(v);
467+
}
468+
}
469+
470+
if (candidates.length === 0) return '';
471+
return candidates.reduce((a, b) => a.length >= b.length ? a : b, '');
472+
}
473+
474+
/**
475+
* 从响应体 Buffer 中提取图片生成的详细描述(thinking)
476+
* @param {Buffer} bodyBuffer - 响应体 Buffer
477+
* @returns {string} 详细描述文本,未找到则返回空字符串
478+
*/
479+
function extractImageThinking(bodyBuffer) {
480+
const frames = parseLenFramedResponse(bodyBuffer);
481+
const payloads = extractPayloads(frames);
482+
483+
let best = '';
484+
for (const payload of payloads) {
485+
const text = findLongDescriptionDeep(payload);
486+
if (text.length > best.length) {
487+
best = text;
488+
}
489+
}
490+
return best;
431491
}

0 commit comments

Comments
 (0)