Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 43 additions & 2 deletions lib/api/readable.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const kContentType = Symbol('kContentType')
const kContentLength = Symbol('kContentLength')
const kUsed = Symbol('kUsed')
const kBytesRead = Symbol('kBytesRead')
const kPreservedBuffer = Symbol('kPreservedBuffer')

const noop = () => {}

Expand Down Expand Up @@ -324,7 +325,37 @@ class BodyReadable extends Readable {
*/
setEncoding (encoding) {
if (Buffer.isEncoding(encoding)) {
this._readableState.encoding = encoding
// Preserve raw Buffer chunks for the consume path (body.text(),
// body.json(), etc.) before super.setEncoding() replaces them
// with decoded strings. Without this, the consume path would
// lose access to the original bytes — some of which may be held
// by the decoder for incomplete multi-byte sequences, and the
// rest converted to strings that can't be safely concatenated
// byte-wise.
const state = this._readableState
const buffer = state.buffer
if (buffer && state.length > 0) {
const bufferIndex = state.bufferIndex ?? 0
const preserved = []
const source = typeof buffer.slice === 'function'
? buffer.slice(bufferIndex)
: buffer
for (const data of source) {
if (Buffer.isBuffer(data)) {
preserved.push(data)
}
}
if (preserved.length > 0) {
this[kPreservedBuffer] = (this[kPreservedBuffer] || []).concat(preserved)
}
}

// Delegate to Node.js Readable.setEncoding() which initializes a
// StringDecoder and re-encodes already-buffered chunks. This properly
// handles multi-byte sequences split at chunk boundaries for the
// for-await / on('data') paths. Without this, Node.js uses
// buf.toString(encoding) on each chunk, producing U+FFFD for split chars.
super.setEncoding(encoding)
}
return this
}
Expand Down Expand Up @@ -432,7 +463,17 @@ function consumeStart (consume) {

const { _readableState: state } = consume.stream

if (state.bufferIndex) {
// If setEncoding() was called, state.buffer may contain decoded strings
// (which would break Buffer.concat in chunksDecode). Use the preserved
// raw Buffers (saved before super.setEncoding() in setEncoding()) for
// byte-level accurate consumption. Otherwise read from state.buffer.
const preserved = consume.stream[kPreservedBuffer]
if (preserved && preserved.length > 0) {
for (const chunk of preserved) {
consumePush(consume, chunk)
}
consume.stream[kPreservedBuffer] = null
} else if (state.bufferIndex) {
const start = state.bufferIndex
const end = state.buffer.length
for (let n = start; n < end; n++) {
Expand Down
52 changes: 52 additions & 0 deletions test/client-request.js
Original file line number Diff line number Diff line change
Expand Up @@ -1525,6 +1525,58 @@ test('request multibyte text with setEncoding', async (t) => {
await t.completed
})

test('setEncoding(\'utf8\') handles 3-byte UTF-8 characters split across chunks', async (t) => {
t = tspl(t, { plan: 2 })

// CJK character '傳' is 3 bytes: 0xe5, 0x82, 0xb3
// Build a payload where this character will be split at the chunk boundary
const cjkChar = '傳' // U+50B3, bytes: e5 82 b3
const prefix = 'a'.repeat(10) // 10 ASCII bytes
const text = prefix + cjkChar + 'end'
const buf = Buffer.from(text) // 10 + 3 + 3 = 16 bytes

// Split at byte 11, which is in the middle of the 3-byte CJK character
// prefix (10 bytes) + first byte of '傳' (0xe5) | remaining 2 bytes (0x82 0xb3) + 'end'
const chunk1 = buf.subarray(0, 11)
const chunk2 = buf.subarray(11)

const server = createServer({ joinDuplicateHeaders: true }, (req, res) => {
// Send raw buffers to ensure the split is exactly where we want it
res.writeHead(200, { 'content-type': 'text/plain; charset=utf-8' })
res.write(chunk1)
// Use setTimeout to force separate TCP packets / chunks
setTimeout(() => {
res.end(chunk2)
}, 50)
})
after(() => {
server.closeAllConnections?.()
server.close()
})

server.listen(0, async () => {
const client = new Client(`http://localhost:${server.address().port}`)
after(client.destroy.bind(client))

const { body } = await client.request({
path: '/',
method: 'GET'
})
body.setEncoding('utf8')

let result = ''
for await (const chunk of body) {
result += chunk
}

// Must not contain U+FFFD replacement characters
t.strictEqual(result.includes('\ufffd'), false, 'should not contain U+FFFD replacement characters')
t.strictEqual(result, text, 'decoded text should match original')
})

await t.completed
})

test('#3736 - Aborted Response (without consuming body)', async (t) => {
const plan = tspl(t, { plan: 1 })

Expand Down
Loading