Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
feature: add remaining LoRA functions bindings to llama.cpp
Signed-off-by: deadprogram <[email protected]>
  • Loading branch information
deadprogram committed Nov 23, 2025
commit ce706bc14219da8eda4de14540b52f50739390f6
4 changes: 2 additions & 2 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ This is a list of all functions exposed by `llama.cpp` and the current state of
- [x] `llama_state_set_data`

### LoRA Functions
- [x] `llama_adapter_get_alora_invocation_tokens`
- [x] `llama_adapter_get_alora_n_invocation_tokens`
- [x] `llama_adapter_lora_free`
- [x] `llama_adapter_lora_init`
- [x] `llama_adapter_meta_count`
Expand Down Expand Up @@ -223,8 +225,6 @@ Note that these functions are considered by `llama.cpp` to be experimental, and

## Functions in `llama.cpp` still needing wrappers

- [ ] `llama_adapter_get_alora_invocation_tokens`
- [ ] `llama_adapter_get_alora_n_invocation_tokens`
- [ ] `llama_apply_adapter_cvec`
- [ ] `llama_attach_threadpool`
- [ ] `llama_detach_threadpool`
Expand Down
42 changes: 42 additions & 0 deletions pkg/llama/lora.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ var (
// Remove all LoRA adapters from given context
// LLAMA_API void llama_clear_adapter_lora(struct llama_context * ctx);
clearAdapterLoraFunc ffi.Fun

// LLAMA_API uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter);
adapterGetAloraNInvocationTokensFunc ffi.Fun

// LLAMA_API const llama_token * llama_adapter_get_alora_invocation_tokens (const struct llama_adapter_lora * adapter);
adapterGetAloraInvocationTokensFunc ffi.Fun
)

var (
Expand Down Expand Up @@ -90,13 +96,23 @@ func loadLoraFuncs(lib ffi.Lib) error {
if setAdapterLoraFunc, err = lib.Prep("llama_set_adapter_lora", &ffi.TypeSint32, &ffi.TypePointer, &ffi.TypePointer, &ffi.TypeFloat); err != nil {
return loadError("llama_set_adapter_lora", err)
}

if rmAdapterLoraFunc, err = lib.Prep("llama_rm_adapter_lora", &ffi.TypeSint32, &ffi.TypePointer, &ffi.TypePointer); err != nil {
return loadError("llama_rm_adapter_lora", err)
}

if clearAdapterLoraFunc, err = lib.Prep("llama_clear_adapter_lora", &ffi.TypeVoid, &ffi.TypePointer); err != nil {
return loadError("llama_clear_adapter_lora", err)
}

if adapterGetAloraNInvocationTokensFunc, err = lib.Prep("llama_adapter_get_alora_n_invocation_tokens", &ffi.TypeUint64, &ffi.TypePointer); err != nil {
return loadError("llama_adapter_get_alora_n_invocation_tokens", err)
}

if adapterGetAloraInvocationTokensFunc, err = lib.Prep("llama_adapter_get_alora_invocation_tokens", &ffi.TypePointer, &ffi.TypePointer); err != nil {
return loadError("llama_adapter_get_alora_invocation_tokens", err)
}

return nil
}

Expand Down Expand Up @@ -263,3 +279,29 @@ func ClearAdapterLora(ctx Context) {
}
clearAdapterLoraFunc.Call(nil, unsafe.Pointer(&ctx))
}

// AdapterGetAloraNInvocationTokens returns the number of invocation tokens for the adapter.
func AdapterGetAloraNInvocationTokens(adapter AdapterLora) uint64 {
if adapter == 0 {
return 0
}
var result ffi.Arg
adapterGetAloraNInvocationTokensFunc.Call(unsafe.Pointer(&result), unsafe.Pointer(&adapter))
return uint64(result)
}

// AdapterGetAloraInvocationTokens returns a slice of invocation tokens for the adapter.
func AdapterGetAloraInvocationTokens(adapter AdapterLora) []Token {
n := AdapterGetAloraNInvocationTokens(adapter)
if n == 0 {
return nil
}

var ptr *Token
adapterGetAloraInvocationTokensFunc.Call(unsafe.Pointer(&ptr), unsafe.Pointer(&adapter))
if ptr == nil {
return nil
}

return unsafe.Slice(ptr, n)
}
46 changes: 46 additions & 0 deletions pkg/llama/lora_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,49 @@ func TestClearAdapterLora(t *testing.T) {
ClearAdapterLora(ctx)
t.Logf("ClearAdapterLora succeeded")
}

func TestAdapterGetAloraNInvocationTokens(t *testing.T) {
modelFile := testLoraModelFileName(t)
loraFile := testLoraAdaptorFileName(t)

testSetup(t)
defer testCleanup(t)

model, err := ModelLoadFromFile(modelFile, ModelDefaultParams())
if err != nil {
t.Fatalf("ModelLoadFromFile failed: %v", err)
}
defer ModelFree(model)

adapter, err := AdapterLoraInit(model, loraFile)
if err != nil {
t.Fatalf("AdapterLoraInit failed: %v", err)
}
defer AdapterLoraFree(adapter)

n := AdapterGetAloraNInvocationTokens(adapter)
t.Logf("AdapterGetAloraNInvocationTokens returned: %d", n)
}

func TestAdapterGetAloraInvocationTokens(t *testing.T) {
modelFile := testLoraModelFileName(t)
loraFile := testLoraAdaptorFileName(t)

testSetup(t)
defer testCleanup(t)

model, err := ModelLoadFromFile(modelFile, ModelDefaultParams())
if err != nil {
t.Fatalf("ModelLoadFromFile failed: %v", err)
}
defer ModelFree(model)

adapter, err := AdapterLoraInit(model, loraFile)
if err != nil {
t.Fatalf("AdapterLoraInit failed: %v", err)
}
defer AdapterLoraFree(adapter)

tokens := AdapterGetAloraInvocationTokens(adapter)
t.Logf("AdapterGetAloraInvocationTokens returned %d tokens", len(tokens))
}