Merge branch 'hp/split/load-model' into hp/split/load-model-from-url

# Conflicts: # examples/gguf-split/gguf-split.cpp
ggml-org · Mar 22, 2024 · a4a6d95 · a4a6d95
2 parents d88d66b + e474e45
commit a4a6d95
Show file tree

Hide file tree

Showing 5 changed files with 213 additions and 217 deletions.
diff --git a/common/common.cpp b/common/common.cpp
@@ -1888,7 +1888,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
             return NULL;
         }
 
-        auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_GENERAL_SPLIT_N_SPLIT);
+        auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
         if (key_n_split >= 0) {
             n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
         }
@@ -1907,7 +1907,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
         char split_path[PATH_MAX] = {0};
         strncpy(split_path, path_model, sizeof(split_path) - 1);
         char split_prefix[PATH_MAX] = {0};
-        if (!llama_split_prefix(split_prefix, split_path, strlen(split_path), 0, n_split)) {
+        if (!llama_split_prefix(split_prefix, sizeof(split_prefix), split_path, 0, n_split)) {
             fprintf(stderr, "\n%s: unexpected input file name: %s"
                             " n_split=%d\n", __func__, split_path, n_split);
             return NULL;

diff --git a/common/common.h b/common/common.h
@@ -306,5 +306,5 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
 //
 // Split utils
 //
-static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT   = "split.no";
-static const char * const LLM_KV_GENERAL_SPLIT_N_SPLIT   = "split.count";
+static const char * const LLM_KV_SPLIT_NO   = "split.no";
+static const char * const LLM_KV_SPLIT_COUNT   = "split.count";
diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp
@@ -26,7 +26,7 @@ enum split_operation : uint8_t {
     SPLIT_OP_MERGE,
 };
 
-static const char * const LLM_KV_GENERAL_SPLIT_N_TENSORS = "split.tensors.count";
+static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
 
 struct split_params {
     split_operation operation = SPLIT_OP_SPLIT;
@@ -175,9 +175,9 @@ struct split_strategy {
         if (i_split == 0) {
             gguf_set_kv(ctx_out, ctx_gguf);
         }
-        gguf_set_val_u16(ctx_out, LLM_KV_GENERAL_SPLIT_I_SPLIT,  i_split);
-        gguf_set_val_u16(ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT,  n_split);
-        gguf_set_val_i32(ctx_out, LLM_KV_GENERAL_SPLIT_N_TENSORS,n_tensors);
+        gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO, i_split);
+        gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT, n_split);
+        gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT, n_tensors);
 
         // populate the original tensors, so we get an initial metadata
         for (int i = i_split * params.n_split_tensors; i < n_tensors && i < (i_split + 1) * params.n_split_tensors; ++i) {
@@ -326,12 +326,12 @@ static void gguf_merge(const split_params & split_params) {
         ctx_metas.push_back(ctx_meta);
 
         if (i_split == 0) {
-            auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_GENERAL_SPLIT_N_SPLIT);
+            auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
             if (key_n_split < 0) {
                 fprintf(stderr,
                         "\n%s: input file does not contain %s metadata\n",
                         __func__,
-                        LLM_KV_GENERAL_SPLIT_N_SPLIT);
+                        LLM_KV_SPLIT_COUNT);
                 gguf_free(ctx_gguf);
                 ggml_free(ctx_meta);
                 gguf_free(ctx_out);
@@ -353,7 +353,7 @@ static void gguf_merge(const split_params & split_params) {
             }
 
             // Verify the file naming and extract split_prefix
-            if (!llama_split_prefix(split_prefix, split_path, strlen(split_path), i_split, n_split)) {
+            if (!llama_split_prefix(split_prefix, sizeof (split_prefix), split_path, i_split, n_split)) {
                 fprintf(stderr, "\n%s: unexpected input file name: %s"
                                 " i_split=%d"
                                 " n_split=%d\n", __func__,
@@ -366,7 +366,7 @@ static void gguf_merge(const split_params & split_params) {
             }
 
             // Do not trigger merge if we try to merge again the output
-            gguf_set_val_u16(ctx_gguf, LLM_KV_GENERAL_SPLIT_N_SPLIT, 0);
+            gguf_set_val_u16(ctx_gguf, LLM_KV_SPLIT_COUNT, 0);
 
             // Set metadata from the first split
             gguf_set_kv(ctx_out, ctx_gguf);