diff --git a/src/server/job-manifest.test.ts b/src/server/job-manifest.test.ts index eccf02e..36bb852 100644 --- a/src/server/job-manifest.test.ts +++ b/src/server/job-manifest.test.ts @@ -802,6 +802,28 @@ describe("buildJobManifest", () => { expect(filterScript).toContain("tool_result"); }); + it("filter script truncates without corrupting multi-byte UTF-8", () => { + // "中" is U+4E2D, 3 bytes in UTF-8: E4 B8 AD + // With MAX=5, two "中" (6 bytes) should truncate to one (3 bytes), not + // produce a replacement character from slicing mid-codepoint. + const setup = buildRtkSetupCommands(5); + const b64Matches = [...setup.matchAll(/Buffer\.from\('([A-Za-z0-9+/=]+)','base64'\)/g)]; + const filterScript = Buffer.from(b64Matches[0]![1], "base64").toString("utf-8"); + + // Extract the trunc function from the filter script and evaluate it + const fnMatch = filterScript.match(/(function trunc\(s\)\{.*\})(?=const tr=)/); + expect(fnMatch).toBeTruthy(); + // eslint-disable-next-line no-eval + const trunc = eval(`(()=>{const MAX=5;${fnMatch![1]};return trunc;})()`); + + const result = trunc("中中"); + expect(result).not.toContain("�"); + expect(result).toContain("中"); + expect(result).toContain("truncated by paperclip-rtk"); + // Should report bytes from the actual truncation point, not MAX + expect(result).toContain("3 bytes truncated"); + }); + it("filter script handles array content (block format)", () => { const setup = buildRtkSetupCommands(50000); const b64Matches = [...setup.matchAll(/Buffer\.from\('([A-Za-z0-9+/=]+)','base64'\)/g)]; diff --git a/src/server/job-manifest.ts b/src/server/job-manifest.ts index 438953d..cc59383 100644 --- a/src/server/job-manifest.ts +++ b/src/server/job-manifest.ts @@ -47,7 +47,8 @@ export function buildRtkSetupCommands(maxOutputBytes: number): string { `if(typeof s!=='string')return s;`, `const b=Buffer.from(s,'utf-8');`, `if(b.length<=MAX)return s;`, - `return b.slice(0,MAX).toString('utf-8')+'\\n[...'+(b.length-MAX)+' bytes truncated by paperclip-rtk]';`, + `let e=MAX;if(e>0){let p=e-1;while(p>0&&(b[p]&0xC0)===0x80)p--;const l=b[p];let n=1;if((l&0xE0)===0xC0)n=2;else if((l&0xF0)===0xE0)n=3;else if((l&0xF8)===0xF0)n=4;if(p+n>e)e=p;}`, + `return b.slice(0,e).toString('utf-8')+'\\n[...'+(b.length-e)+' bytes truncated by paperclip-rtk]';`, `}`, `const tr=o&&(o.tool_response||o.tool_result);`, `if(tr){`,