Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions lib/subscribers/openai/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ function instrumentStream({ agent, headers, logger, request, response, segment,
response.iterator = async function * wrappedIterator() {
let content = ''
let role = ''
let finishReason = ''
let chunk
try {
const iterator = orig.apply(this, arguments)
Expand All @@ -261,15 +262,23 @@ function instrumentStream({ agent, headers, logger, request, response, segment,
role = chunk.choices[0].delta.role
}

if (chunk.choices?.[0]?.finish_reason) {
finishReason = chunk.choices[0].finish_reason
}

content += chunk.choices?.[0]?.delta?.content ?? ''
yield chunk
}
} catch (streamErr) {
err = streamErr
throw err
} finally {
if (chunk?.choices && chunk?.choices?.length !== 0) {
chunk.choices[0].message = { role, content }
// when `chunk.choices` is an array that means the completions API is being used
// we must re-assign the finish reason, and construct a message object with role and content
// This is because if `include_usage` is enabled, the last chunk only contains usage info and no message deltas
if (Array.isArray(chunk?.choices)) {
chunk.choices = [{ finish_reason: finishReason, message: { role, content } }]
// This means it is the responses API and the entire message is in the response object
} else if (chunk?.response) {
chunk = chunk.response
}
Expand Down
38 changes: 36 additions & 2 deletions test/unit/llm-events/openai/embedding.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,19 @@ test('should set error to true', (t, end) => {
})
})

test('respects record_content', (t, end) => {
test('respects record_content by not recording content when set to false', (t, end) => {
const { agent } = t.nr
const req = {
input: 'This is my test input',
model: 'gpt-3.5-turbo-0613'
}
agent.config.ai_monitoring.record_content.enabled = false
function cb(model, content) {
return 65
}

const api = helper.getAgentApi()
api.setLlmTokenCountCallback(cb)

helper.runInTransaction(agent, () => {
const segment = agent.tracer.getSegment()
Expand All @@ -118,11 +124,12 @@ test('respects record_content', (t, end) => {
response: res
})
assert.equal(embeddingEvent.input, undefined)
assert.equal(embeddingEvent['response.usage.total_tokens'], 65)
end()
})
})

test('respects record_content', (t, end) => {
test('respects record_content by recording content when true', (t, end) => {
const { agent } = t.nr
const req = {
input: 'This is my test input',
Expand All @@ -144,6 +151,33 @@ test('respects record_content', (t, end) => {
response: res
})
assert.equal(embeddingEvent['response.usage.total_tokens'], 65)
assert.equal(embeddingEvent.input, req.input)
end()
})
})

test('does not calculate tokens when no content exists', (t, end) => {
const { agent } = t.nr
const req = {
model: 'gpt-3.5-turbo-0613'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A nit, but I feel like we should store these model ids off as constants at the top of the file

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can but rather do this as a follow up. These 3 PRs around tokens fix bugs

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, just wanted to point it out

}

function cb(model, content) {
return 65
}

const api = helper.getAgentApi()
api.setLlmTokenCountCallback(cb)
helper.runInTransaction(agent, () => {
const segment = agent.tracer.getSegment()
const embeddingEvent = new LlmEmbedding({
agent,
segment,
request: req,
response: res
})
assert.equal(embeddingEvent['response.usage.total_tokens'], undefined)
assert.equal(embeddingEvent.input, undefined)
end()
})
})
Expand Down
30 changes: 18 additions & 12 deletions test/versioned/openai/chat-completions-res-api.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ test('responses.create', async (t) => {
})

const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
assertChatCompletionSummary({ tx, model, chatSummary, tokenUsage: true })
assertChatCompletionSummary({ tx, model, chatSummary })

tx.end()
end()
Expand Down Expand Up @@ -174,7 +174,7 @@ test('responses.create', async (t) => {
})

const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
assertChatCompletionSummary({ tx, model, chatSummary, tokenUsage: true, singleInput: true })
assertChatCompletionSummary({ tx, model, chatSummary, singleInput: true })

tx.end()
end()
Expand Down Expand Up @@ -318,8 +318,7 @@ test('responses.create', async (t) => {
const stream = await client.responses.create({
stream: true,
input: content,
model: 'gpt-4',
stream_options: { include_usage: true }
model: 'gpt-4'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following up with the previous comment, are these tests dependent on the GPT version (if I remember correctly, I think not)? If not, the mocks and tests should just support 1 model id for simplicity

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the model is not taken into account from the mock server. places where the model is stored as a variable is for assertions in the LlmCompletion* events

Copy link
Contributor

@amychisholm03 amychisholm03 Nov 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, then we could just standardize them e.g. replacing 3.5 with 4 for consistency. It might make it easier to reason about when we look at it in the future, so we don't think it's model-specific. Like you said, this can be done in a separate PR.

})

let chunk = {}
Expand Down Expand Up @@ -347,11 +346,11 @@ test('responses.create', async (t) => {
const { client, agent } = t.nr
helper.runInTransaction(agent, async (tx) => {
const content = 'Streamed response'
const model = 'gpt-4-0613'
const stream = await client.responses.create({
stream: true,
input: [{ role: 'user', content }, { role: 'user', content: 'What does 1 plus 1 equal?' }],
model: 'gpt-4',
stream_options: { include_usage: true }
model,
})

let chunk = {}
Expand All @@ -366,10 +365,12 @@ test('responses.create', async (t) => {
tx,
chatMsgs,
id: 'resp_684886977be881928c9db234e14ae7d80f8976796514dff9',
model: 'gpt-4-0613',
model,
resContent: res,
reqContent: content
})
const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
assertChatCompletionSummary({ tx, model, chatSummary, streaming: true })

tx.end()
end()
Expand All @@ -378,23 +379,27 @@ test('responses.create', async (t) => {

await t.test('should call the tokenCountCallback in streaming', (t, end) => {
const { client, agent } = t.nr
const model = 'gpt-4-0613'
const promptContent = 'Streamed response'
const promptContent2 = 'What does 1 plus 1 equal?'
const promptTokens = 53
const completionTokens = 11
const res = 'Test stream'
const api = helper.getAgentApi()
// swap the token counts
function cb(model, content) {
// could be gpt-4 or gpt-4-0613
assert.ok(model === 'gpt-4' || model === 'gpt-4-0613', 'should be gpt-4 or gpt-4-0613')
if (content === promptContent + ' ' + promptContent2) {
return 53
return promptTokens
} else if (content === res) {
return 11
return completionTokens
}
}
api.setLlmTokenCountCallback(cb)
helper.runInTransaction(agent, async (tx) => {
const stream = await client.responses.create({
model: 'gpt-4',
model,
input: [
{ role: 'user', content: promptContent },
{ role: 'user', content: promptContent2 }
Expand All @@ -410,14 +415,15 @@ test('responses.create', async (t) => {
const events = agent.customEventAggregator.events.toArray()
const chatMsgs = events.filter(([{ type }]) => type === 'LlmChatCompletionMessage')
assertChatCompletionMessages({
tokenUsage: true,
tx,
chatMsgs,
id: 'resp_684886977be881928c9db234e14ae7d80f8976796514dff9',
model: 'gpt-4-0613',
model,
resContent: res,
reqContent: promptContent
})
const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
assertChatCompletionSummary({ tx, model, chatSummary, streaming: true, promptTokens, completionTokens })

tx.end()
end()
Expand Down
69 changes: 60 additions & 9 deletions test/versioned/openai/chat-completions.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -148,22 +148,21 @@ test('chat.completions.create', async (t) => {
})

const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
assertChatCompletionSummary({ tx, model, chatSummary, tokenUsage: true })
assertChatCompletionSummary({ tx, model, chatSummary })

tx.end()
end()
})
})

if (semver.gte(pkgVersion, '4.12.2')) {
await t.test('should create span on successful chat completion stream create', { skip: semver.lt(pkgVersion, '4.12.2') }, (t, end) => {
await t.test('should create span on successful chat completion stream create', (t, end) => {
const { client, agent, host, port } = t.nr
helper.runInTransaction(agent, async (tx) => {
const content = 'Streamed response'
const stream = await client.chat.completions.create({
stream: true,
messages: [{ role: 'user', content }],
stream_options: { include_usage: true },
messages: [{ role: 'user', content }]
})

let chunk = {}
Expand Down Expand Up @@ -202,8 +201,7 @@ test('chat.completions.create', async (t) => {
{ role: 'user', content },
{ role: 'user', content: 'What does 1 plus 1 equal?' }
],
stream: true,
stream_options: { include_usage: true },
stream: true
})

let res = ''
Expand All @@ -220,6 +218,55 @@ test('chat.completions.create', async (t) => {
i++
}

const events = agent.customEventAggregator.events.toArray()
assert.equal(events.length, 4, 'should create a chat completion message and summary event')
const chatMsgs = events.filter(([{ type }]) => type === 'LlmChatCompletionMessage')
assertChatCompletionMessages({
tx,
chatMsgs,
id: 'chatcmpl-8MzOfSMbLxEy70lYAolSwdCzfguQZ',
model,
resContent: res,
reqContent: content,
noTokenUsage: true
})

const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
assertChatCompletionSummary({ tx, model, chatSummary, noUsageTokens: true })

tx.end()
end()
})
})

await t.test('should assign usage information when `include_usage` exists in stream', (t, end) => {
const { client, agent } = t.nr
helper.runInTransaction(agent, async (tx) => {
const content = 'Streamed response usage'
const model = 'gpt-4'
const stream = await client.chat.completions.create({
stream: true,
model,
messages: [
{ role: 'user', content },
{ role: 'user', content: 'What does 1 plus 1 equal?' }
],
streaming_options: { include_usage: true }
})

let chunk = {}
let res = ''
for await (chunk of stream) {
if (!chunk.usage) {
res += chunk.choices[0]?.delta?.content
}
}
assert.equal(chunk.headers, undefined, 'should remove response headers from user result')
assert.equal(chunk.choices[0].message.role, 'assistant')
const expectedRes = responses.get(content)
assert.equal(chunk.choices[0].message.content, expectedRes.streamData)
assert.equal(chunk.choices[0].message.content, res)
assert.deepEqual(chunk.usage, { prompt_tokens: 53, completion_tokens: 11, total_tokens: 64 })
const events = agent.customEventAggregator.events.toArray()
assert.equal(events.length, 4, 'should create a chat completion message and summary event')
const chatMsgs = events.filter(([{ type }]) => type === 'LlmChatCompletionMessage')
Expand All @@ -244,15 +291,17 @@ test('chat.completions.create', async (t) => {
const { client, agent } = t.nr
const promptContent = 'Streamed response'
const promptContent2 = 'What does 1 plus 1 equal?'
const promptTokens = 11
const completionTokens = 53
let res = ''
const expectedModel = 'gpt-4'
const api = helper.getAgentApi()
function cb(model, content) {
assert.equal(model, expectedModel)
if (content === promptContent + ' ' + promptContent2) {
return 53
return promptTokens
} else if (content === res) {
return 11
return completionTokens
}
}
api.setLlmTokenCountCallback(cb)
Expand All @@ -276,7 +325,6 @@ test('chat.completions.create', async (t) => {
const events = agent.customEventAggregator.events.toArray()
const chatMsgs = events.filter(([{ type }]) => type === 'LlmChatCompletionMessage')
assertChatCompletionMessages({
tokenUsage: true,
tx,
chatMsgs,
id: 'chatcmpl-8MzOfSMbLxEy70lYAolSwdCzfguQZ',
Expand All @@ -285,6 +333,9 @@ test('chat.completions.create', async (t) => {
reqContent: promptContent
})

const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
assertChatCompletionSummary({ tx, model: expectedModel, chatSummary, promptTokens, completionTokens })

tx.end()
end()
})
Expand Down
16 changes: 11 additions & 5 deletions test/versioned/openai/common-chat-api.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module.exports = {
const { match } = require('../../lib/custom-assertions')

function assertChatCompletionMessages(
{ tx, chatMsgs, id, model, reqContent, resContent, tokenUsage },
{ tx, chatMsgs, id, model, reqContent, resContent, noTokenUsage },
{ assert = require('node:assert') } = {}
) {
const [segment] = tx.trace.getChildren(tx.trace.root.id)
Expand All @@ -36,14 +36,14 @@ function assertChatCompletionMessages(
expectedChatMsg.sequence = 0
expectedChatMsg.id = `${id}-0`
expectedChatMsg.content = reqContent
if (tokenUsage) {
if (!noTokenUsage) {
expectedChatMsg.token_count = 0
}
} else if (msg[1].sequence === 1) {
expectedChatMsg.sequence = 1
expectedChatMsg.id = `${id}-1`
expectedChatMsg.content = 'What does 1 plus 1 equal?'
if (tokenUsage) {
if (!noTokenUsage) {
expectedChatMsg.token_count = 0
}
} else {
Expand All @@ -52,7 +52,7 @@ function assertChatCompletionMessages(
expectedChatMsg.id = `${id}-2`
expectedChatMsg.content = resContent
expectedChatMsg.is_response = true
if (tokenUsage) {
if (!noTokenUsage) {
expectedChatMsg.token_count = 0
}
}
Expand All @@ -63,7 +63,7 @@ function assertChatCompletionMessages(
}

function assertChatCompletionSummary(
{ tx, model, chatSummary, error = false },
{ tx, model, chatSummary, error = false, promptTokens = 53, completionTokens = 11, totalTokens = 64, noUsageTokens = false },
{ assert = require('node:assert') } = {}
) {
const [segment] = tx.trace.getChildren(tx.trace.root.id)
Expand All @@ -90,6 +90,12 @@ function assertChatCompletionSummary(
error
}

if (!(error || noUsageTokens)) {
expectedChatSummary['response.usage.prompt_tokens'] = promptTokens
expectedChatSummary['response.usage.completion_tokens'] = completionTokens
expectedChatSummary['response.usage.total_tokens'] = totalTokens
}

assert.equal(chatSummary[0].type, 'LlmChatCompletionSummary')
match(chatSummary[1], expectedChatSummary, { assert })
}
Loading
Loading