newrelic · bizob2828 · Nov 11, 2025 · Nov 6, 2025 · amychisholm03 · Nov 11, 2025
diff --git a/lib/subscribers/openai/utils.js b/lib/subscribers/openai/utils.js
@@ -252,6 +252,7 @@ function instrumentStream({ agent, headers, logger, request, response, segment,
   response.iterator = async function * wrappedIterator() {
     let content = ''
     let role = ''
+    let finishReason = ''
     let chunk
     try {
       const iterator = orig.apply(this, arguments)
@@ -261,15 +262,23 @@ function instrumentStream({ agent, headers, logger, request, response, segment,
           role = chunk.choices[0].delta.role
         }
 
+        if (chunk.choices?.[0]?.finish_reason) {
+          finishReason = chunk.choices[0].finish_reason
+        }
+
         content += chunk.choices?.[0]?.delta?.content ?? ''
         yield chunk
       }
     } catch (streamErr) {
       err = streamErr
       throw err
     } finally {
-      if (chunk?.choices && chunk?.choices?.length !== 0) {
-        chunk.choices[0].message = { role, content }
+      // when `chunk.choices` is an array that means the completions API is being used
+      // we must re-assign the finish reason, and construct a message object with role and content
+      // This is because if `include_usage` is enabled, the last chunk only contains usage info and no message deltas
+      if (Array.isArray(chunk?.choices)) {
+        chunk.choices = [{ finish_reason: finishReason, message: { role, content } }]
+      // This means it is the responses API and the entire message is in the response object
       } else if (chunk?.response) {
         chunk = chunk.response
       }

diff --git a/test/unit/llm-events/openai/embedding.test.js b/test/unit/llm-events/openai/embedding.test.js
@@ -101,13 +101,19 @@ test('should set error to true', (t, end) => {
   })
 })
 
-test('respects record_content', (t, end) => {
+test('respects record_content by not recording content when set to false', (t, end) => {
   const { agent } = t.nr
   const req = {
     input: 'This is my test input',
     model: 'gpt-3.5-turbo-0613'
   }
   agent.config.ai_monitoring.record_content.enabled = false
+  function cb(model, content) {
+    return 65
+  }
+
+  const api = helper.getAgentApi()
+  api.setLlmTokenCountCallback(cb)
 
   helper.runInTransaction(agent, () => {
     const segment = agent.tracer.getSegment()
@@ -118,11 +124,12 @@ test('respects record_content', (t, end) => {
       response: res
     })
     assert.equal(embeddingEvent.input, undefined)
+    assert.equal(embeddingEvent['response.usage.total_tokens'], 65)
     end()
   })
 })
 
-test('respects record_content', (t, end) => {
+test('respects record_content by recording content when true', (t, end) => {
   const { agent } = t.nr
   const req = {
     input: 'This is my test input',
@@ -144,6 +151,33 @@ test('respects record_content', (t, end) => {
       response: res
     })
     assert.equal(embeddingEvent['response.usage.total_tokens'], 65)
+    assert.equal(embeddingEvent.input, req.input)
+    end()
+  })
+})
+
+test('does not calculate tokens when no content exists', (t, end) => {
+  const { agent } = t.nr
+  const req = {
+    model: 'gpt-3.5-turbo-0613'
+  }
+
+  function cb(model, content) {
+    return 65
+  }
+
+  const api = helper.getAgentApi()
+  api.setLlmTokenCountCallback(cb)
+  helper.runInTransaction(agent, () => {
+    const segment = agent.tracer.getSegment()
+    const embeddingEvent = new LlmEmbedding({
+      agent,
+      segment,
+      request: req,
+      response: res
+    })
+    assert.equal(embeddingEvent['response.usage.total_tokens'], undefined)
+    assert.equal(embeddingEvent.input, undefined)
     end()
   })
 })

diff --git a/test/versioned/openai/chat-completions-res-api.test.js b/test/versioned/openai/chat-completions-res-api.test.js
@@ -143,7 +143,7 @@ test('responses.create', async (t) => {
       })
 
       const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
-      assertChatCompletionSummary({ tx, model, chatSummary, tokenUsage: true })
+      assertChatCompletionSummary({ tx, model, chatSummary })
 
       tx.end()
       end()
@@ -174,7 +174,7 @@ test('responses.create', async (t) => {
       })
 
       const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
-      assertChatCompletionSummary({ tx, model, chatSummary, tokenUsage: true, singleInput: true })
+      assertChatCompletionSummary({ tx, model, chatSummary, singleInput: true })
 
       tx.end()
       end()
@@ -318,8 +318,7 @@ test('responses.create', async (t) => {
       const stream = await client.responses.create({
         stream: true,
         input: content,
-        model: 'gpt-4',
-        stream_options: { include_usage: true }
+        model: 'gpt-4'
       })
 
       let chunk = {}
@@ -347,11 +346,11 @@ test('responses.create', async (t) => {
     const { client, agent } = t.nr
     helper.runInTransaction(agent, async (tx) => {
       const content = 'Streamed response'
+      const model = 'gpt-4-0613'
       const stream = await client.responses.create({
         stream: true,
         input: [{ role: 'user', content }, { role: 'user', content: 'What does 1 plus 1 equal?' }],
-        model: 'gpt-4',
-        stream_options: { include_usage: true }
+        model,
       })
 
       let chunk = {}
@@ -366,10 +365,12 @@ test('responses.create', async (t) => {
         tx,
         chatMsgs,
         id: 'resp_684886977be881928c9db234e14ae7d80f8976796514dff9',
-        model: 'gpt-4-0613',
+        model,
         resContent: res,
         reqContent: content
       })
+      const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
+      assertChatCompletionSummary({ tx, model, chatSummary, streaming: true })
 
       tx.end()
       end()
@@ -378,23 +379,27 @@ test('responses.create', async (t) => {
 
   await t.test('should call the tokenCountCallback in streaming', (t, end) => {
     const { client, agent } = t.nr
+    const model = 'gpt-4-0613'
     const promptContent = 'Streamed response'
     const promptContent2 = 'What does 1 plus 1 equal?'
+    const promptTokens = 53
+    const completionTokens = 11
     const res = 'Test stream'
     const api = helper.getAgentApi()
+    // swap the token counts
     function cb(model, content) {
       // could be gpt-4 or gpt-4-0613
       assert.ok(model === 'gpt-4' || model === 'gpt-4-0613', 'should be gpt-4 or gpt-4-0613')
       if (content === promptContent + ' ' + promptContent2) {
-        return 53
+        return promptTokens
       } else if (content === res) {
-        return 11
+        return completionTokens
       }
     }
     api.setLlmTokenCountCallback(cb)
     helper.runInTransaction(agent, async (tx) => {
       const stream = await client.responses.create({
-        model: 'gpt-4',
+        model,
         input: [
           { role: 'user', content: promptContent },
           { role: 'user', content: promptContent2 }
@@ -410,14 +415,15 @@ test('responses.create', async (t) => {
       const events = agent.customEventAggregator.events.toArray()
       const chatMsgs = events.filter(([{ type }]) => type === 'LlmChatCompletionMessage')
       assertChatCompletionMessages({
-        tokenUsage: true,
         tx,
         chatMsgs,
         id: 'resp_684886977be881928c9db234e14ae7d80f8976796514dff9',
-        model: 'gpt-4-0613',
+        model,
         resContent: res,
         reqContent: promptContent
       })
+      const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
+      assertChatCompletionSummary({ tx, model, chatSummary, streaming: true, promptTokens, completionTokens })
 
       tx.end()
       end()

diff --git a/test/versioned/openai/chat-completions.test.js b/test/versioned/openai/chat-completions.test.js
@@ -148,22 +148,21 @@ test('chat.completions.create', async (t) => {
       })
 
       const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
-      assertChatCompletionSummary({ tx, model, chatSummary, tokenUsage: true })
+      assertChatCompletionSummary({ tx, model, chatSummary })
 
       tx.end()
       end()
     })
   })
 
   if (semver.gte(pkgVersion, '4.12.2')) {
-    await t.test('should create span on successful chat completion stream create', { skip: semver.lt(pkgVersion, '4.12.2') }, (t, end) => {
+    await t.test('should create span on successful chat completion stream create', (t, end) => {
       const { client, agent, host, port } = t.nr
       helper.runInTransaction(agent, async (tx) => {
         const content = 'Streamed response'
         const stream = await client.chat.completions.create({
           stream: true,
-          messages: [{ role: 'user', content }],
-          stream_options: { include_usage: true },
+          messages: [{ role: 'user', content }]
         })
 
         let chunk = {}
@@ -202,8 +201,7 @@ test('chat.completions.create', async (t) => {
             { role: 'user', content },
             { role: 'user', content: 'What does 1 plus 1 equal?' }
           ],
-          stream: true,
-          stream_options: { include_usage: true },
+          stream: true
         })
 
         let res = ''
@@ -220,6 +218,55 @@ test('chat.completions.create', async (t) => {
           i++
         }
 
+        const events = agent.customEventAggregator.events.toArray()
+        assert.equal(events.length, 4, 'should create a chat completion message and summary event')
+        const chatMsgs = events.filter(([{ type }]) => type === 'LlmChatCompletionMessage')
+        assertChatCompletionMessages({
+          tx,
+          chatMsgs,
+          id: 'chatcmpl-8MzOfSMbLxEy70lYAolSwdCzfguQZ',
+          model,
+          resContent: res,
+          reqContent: content,
+          noTokenUsage: true
+        })
+
+        const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
+        assertChatCompletionSummary({ tx, model, chatSummary, noUsageTokens: true })
+
+        tx.end()
+        end()
+      })
+    })
+
+    await t.test('should assign usage information when `include_usage` exists in stream', (t, end) => {
+      const { client, agent } = t.nr
+      helper.runInTransaction(agent, async (tx) => {
+        const content = 'Streamed response usage'
+        const model = 'gpt-4'
+        const stream = await client.chat.completions.create({
+          stream: true,
+          model,
+          messages: [
+            { role: 'user', content },
+            { role: 'user', content: 'What does 1 plus 1 equal?' }
+          ],
+          streaming_options: { include_usage: true }
+        })
+
+        let chunk = {}
+        let res = ''
+        for await (chunk of stream) {
+          if (!chunk.usage) {
+            res += chunk.choices[0]?.delta?.content
+          }
+        }
+        assert.equal(chunk.headers, undefined, 'should remove response headers from user result')
+        assert.equal(chunk.choices[0].message.role, 'assistant')
+        const expectedRes = responses.get(content)
+        assert.equal(chunk.choices[0].message.content, expectedRes.streamData)
+        assert.equal(chunk.choices[0].message.content, res)
+        assert.deepEqual(chunk.usage, { prompt_tokens: 53, completion_tokens: 11, total_tokens: 64 })
         const events = agent.customEventAggregator.events.toArray()
         assert.equal(events.length, 4, 'should create a chat completion message and summary event')
         const chatMsgs = events.filter(([{ type }]) => type === 'LlmChatCompletionMessage')
@@ -244,15 +291,17 @@ test('chat.completions.create', async (t) => {
       const { client, agent } = t.nr
       const promptContent = 'Streamed response'
       const promptContent2 = 'What does 1 plus 1 equal?'
+      const promptTokens = 11
+      const completionTokens = 53
       let res = ''
       const expectedModel = 'gpt-4'
       const api = helper.getAgentApi()
       function cb(model, content) {
         assert.equal(model, expectedModel)
         if (content === promptContent + ' ' + promptContent2) {
-          return 53
+          return promptTokens
         } else if (content === res) {
-          return 11
+          return completionTokens
         }
       }
       api.setLlmTokenCountCallback(cb)
@@ -276,7 +325,6 @@ test('chat.completions.create', async (t) => {
         const events = agent.customEventAggregator.events.toArray()
         const chatMsgs = events.filter(([{ type }]) => type === 'LlmChatCompletionMessage')
         assertChatCompletionMessages({
-          tokenUsage: true,
           tx,
           chatMsgs,
           id: 'chatcmpl-8MzOfSMbLxEy70lYAolSwdCzfguQZ',
@@ -285,6 +333,9 @@ test('chat.completions.create', async (t) => {
           reqContent: promptContent
         })
 
+        const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
+        assertChatCompletionSummary({ tx, model: expectedModel, chatSummary, promptTokens, completionTokens })
+
         tx.end()
         end()
       })

diff --git a/test/versioned/openai/common-chat-api.js b/test/versioned/openai/common-chat-api.js
@@ -13,7 +13,7 @@ module.exports = {
 const { match } = require('../../lib/custom-assertions')
 
 function assertChatCompletionMessages(
-  { tx, chatMsgs, id, model, reqContent, resContent, tokenUsage },
+  { tx, chatMsgs, id, model, reqContent, resContent, noTokenUsage },
   { assert = require('node:assert') } = {}
 ) {
   const [segment] = tx.trace.getChildren(tx.trace.root.id)
@@ -36,14 +36,14 @@ function assertChatCompletionMessages(
       expectedChatMsg.sequence = 0
       expectedChatMsg.id = `${id}-0`
       expectedChatMsg.content = reqContent
-      if (tokenUsage) {
+      if (!noTokenUsage) {
         expectedChatMsg.token_count = 0
       }
     } else if (msg[1].sequence === 1) {
       expectedChatMsg.sequence = 1
       expectedChatMsg.id = `${id}-1`
       expectedChatMsg.content = 'What does 1 plus 1 equal?'
-      if (tokenUsage) {
+      if (!noTokenUsage) {
         expectedChatMsg.token_count = 0
       }
     } else {
@@ -52,7 +52,7 @@ function assertChatCompletionMessages(
       expectedChatMsg.id = `${id}-2`
       expectedChatMsg.content = resContent
       expectedChatMsg.is_response = true
-      if (tokenUsage) {
+      if (!noTokenUsage) {
         expectedChatMsg.token_count = 0
       }
     }
@@ -63,7 +63,7 @@ function assertChatCompletionMessages(
 }
 
 function assertChatCompletionSummary(
-  { tx, model, chatSummary, error = false },
+  { tx, model, chatSummary, error = false, promptTokens = 53, completionTokens = 11, totalTokens = 64, noUsageTokens = false },
   { assert = require('node:assert') } = {}
 ) {
   const [segment] = tx.trace.getChildren(tx.trace.root.id)
@@ -90,6 +90,12 @@ function assertChatCompletionSummary(
     error
   }
 
+  if (!(error || noUsageTokens)) {
+    expectedChatSummary['response.usage.prompt_tokens'] = promptTokens
+    expectedChatSummary['response.usage.completion_tokens'] = completionTokens
+    expectedChatSummary['response.usage.total_tokens'] = totalTokens
+  }
+
   assert.equal(chatSummary[0].type, 'LlmChatCompletionSummary')
   match(chatSummary[1], expectedChatSummary, { assert })
 }