New llama-run

- Added readline.cpp include - Created run_chat_mode(): - Initializes readline with command history - Maintains conversation history - Applies chat templates to format messages - Submits completion tasks to the server queue - Displays assistant responses interactively Signed-off-by: Eric Curtin <[email protected]>
ggml-org · ericcurtin · Nov 27, 2025 · Nov 28, 2025 · a4490651a260a378220fa07f5c3a7dc323b61490
commit a4490651a260a378220fa07f5c3a7dc323b61490
@@ -610,6 +610,6 @@ $ echo "source ~/.llama-completion.bash" >> ~/.bashrc
 - [stb-image](https://github.com/nothings/stb) - Single-header image format decoder, used by multimodal subsystem - Public domain
 - [nlohmann/json](https://github.com/nlohmann/json) - Single-header JSON library, used by various tools/examples - MIT License
 - [minja](https://github.com/google/minja) - Minimal Jinja parser in C++, used by various tools/examples - MIT License
-- [linenoise.cpp](./tools/run/linenoise.cpp/linenoise.cpp) - C++ library that provides readline-like line editing capabilities, used by `llama-run` - BSD 2-Clause License
+- [readline.cpp](https://github.com/ericcurtin/readline.cpp) - C++ library that provides readline-like line editing capabilities, used by `llama-run` - MIT License
 - [curl](https://curl.se/) - Client-side URL transfer library, used by various tools/examples - [CURL License](https://curl.se/docs/copyright.html)
 - [miniaudio.h](https://github.com/mackron/miniaudio) - Single-header audio format decoder, used by multimodal subsystem - Public domain
diff --git a/common/download.cpp b/common/download.cpp
@@ -430,7 +430,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
     curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
     curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
     curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
-    curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
+    curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
     typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
     auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
         auto data_vec = static_cast<std::vector<char> *>(data);

diff --git a/tools/run/CMakeLists.txt b/tools/run/CMakeLists.txt
@@ -1,5 +1,53 @@
 set(TARGET llama-run)
-add_executable(${TARGET} run.cpp linenoise.cpp/linenoise.cpp)
+
+if (MINGW)
+    # fix: https://github.com/ggml-org/llama.cpp/actions/runs/9651004652/job/26617901362?pr=8006
+    add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
+endif()
+
+if (NOT LLAMA_HTTPLIB)
+    message(FATAL_ERROR "LLAMA_HTTPLIB is OFF, cannot build llama-run. Hint: to skip building run, set -DLLAMA_BUILD_RUN=OFF")
+endif()
+
+# Include server source files (except server.cpp which has its own main())
+set(SERVER_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../server)
+set(TARGET_SRCS
+    run.cpp
+    ${SERVER_DIR}/server-http.cpp
+    ${SERVER_DIR}/server-http.h
+    ${SERVER_DIR}/server-task.cpp
+    ${SERVER_DIR}/server-task.h
+    ${SERVER_DIR}/server-queue.cpp
+    ${SERVER_DIR}/server-queue.h
+    ${SERVER_DIR}/server-common.cpp
+    ${SERVER_DIR}/server-common.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/run-chat.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/run-chat.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/readline.cpp/src/readline.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/readline.cpp/src/buffer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/readline.cpp/src/history.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/readline.cpp/src/terminal.cpp
+)
+
+# Generate public asset headers (needed by server-http.cpp)
+set(PUBLIC_ASSETS
+    index.html.gz
+    loading.html
+)
+
+foreach(asset ${PUBLIC_ASSETS})
+    set(input "${SERVER_DIR}/public/${asset}")
+    set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
+    list(APPEND TARGET_SRCS ${output})
+    add_custom_command(
+        DEPENDS "${input}"
+        OUTPUT "${output}"
+        COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
+    )
+    set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
+endforeach()
+
+add_executable(${TARGET} ${TARGET_SRCS})
 
 # TODO: avoid copying this code block from common/CMakeLists.txt
 set(LLAMA_RUN_EXTRA_LIBS "")
@@ -19,5 +67,17 @@ if (CMAKE_SYSTEM_NAME MATCHES "AIX")
     target_link_libraries(${TARGET} PRIVATE -lbsd)
 endif()
 
-target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_RUN_EXTRA_LIBS})
+# Include directories for server headers and readline
+target_include_directories(${TARGET} PRIVATE ${SERVER_DIR})
+target_include_directories(${TARGET} PRIVATE ${SERVER_DIR}/../mtmd)
+target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
+target_include_directories(${TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/readline.cpp/include)
+target_include_directories(${TARGET} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+
+target_link_libraries(${TARGET} PRIVATE common mtmd llama cpp-httplib ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_RUN_EXTRA_LIBS})
+
+if (WIN32)
+    target_link_libraries(${TARGET} PRIVATE ws2_32)
+endif()
+
 target_compile_features(${TARGET} PRIVATE cxx_std_17)