Handle default marker insertion for LFM2

ggml-org · tdakhran · Nov 25, 2025 · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025
commit 1fea2d1b068ceeed3fccebac21221ceaf68b1589
@@ -313,7 +313,7 @@ int main(int argc, char ** argv) {
         g_is_generating = true;
         if (params.prompt.find(mtmd_default_marker()) == std::string::npos) {
             for (size_t i = 0; i < params.image.size(); i++) {
-                params.prompt += mtmd_default_marker();
+                params.prompt = mtmd::mtmd_add_default_marker(ctx.ctx_vision.get(), params.prompt);
             }
         }
         common_chat_msg msg;
@@ -378,7 +378,7 @@ int main(int argc, char ** argv) {
                 std::string media_path = line.substr(7);
                 if (ctx.load_media(media_path)) {
                     LOG("%s %s loaded\n", media_path.c_str(), is_image ? "image" : "audio");
-                    content += mtmd_default_marker();
+                    content = mtmd::mtmd_add_default_marker(ctx.ctx_vision.get(), content);
                 }
                 // else, error is already printed by libmtmd
                 continue;

@@ -1103,3 +1103,12 @@ void mtmd_log_set(ggml_log_callback log_callback, void * user_data) {
     g_logger_state.log_callback = log_callback ? log_callback : clip_log_callback_default;
     g_logger_state.log_callback_user_data = user_data;
 }
+
+std::string mtmd::mtmd_add_default_marker(mtmd_context *ctx, const std::string &str) {
+    // for LFM2 image embeddings positioned before the text
+    if (ctx && ctx->ctx_v && clip_get_projector_type(ctx->ctx_v) == PROJECTOR_TYPE_LFM2) {
+        return mtmd_default_marker() + str;
+    }
+
+    return str + mtmd_default_marker();
+}
@@ -299,6 +299,9 @@ struct input_chunks {
     }
 };
 
+// insert mtmd_default_marker() into given string, position depends on the projector
+std::string mtmd_add_default_marker(mtmd_context *ctx, const std::string &str);
+
 } // namespace mtmd
 
 #endif
-Original file line number
+Diff line change
@@ Expand Up / @@ -299,6 +299,9 @@ struct input_chunks { @@
         }
     };
+    // insert mtmd_default_marker() into given string, position depends on the projector
+    std::string mtmd_add_default_marker(mtmd_context *ctx, const std::string &str);
     } // namespace mtmd
     #endif
@@ Expand Down @@