Skip to content

Commit

Permalink
Merge branch 'hp/split/load-model' into hp/split/load-model-from-url
Browse files Browse the repository at this point in the history
# Conflicts:
#	examples/gguf-split/gguf-split.cpp
  • Loading branch information
phymbert committed Mar 22, 2024
2 parents d88d66b + e474e45 commit a4a6d95
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 217 deletions.
4 changes: 2 additions & 2 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1888,7 +1888,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
return NULL;
}

auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_GENERAL_SPLIT_N_SPLIT);
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
if (key_n_split >= 0) {
n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
}
Expand All @@ -1907,7 +1907,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
char split_path[PATH_MAX] = {0};
strncpy(split_path, path_model, sizeof(split_path) - 1);
char split_prefix[PATH_MAX] = {0};
if (!llama_split_prefix(split_prefix, split_path, strlen(split_path), 0, n_split)) {
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), split_path, 0, n_split)) {
fprintf(stderr, "\n%s: unexpected input file name: %s"
" n_split=%d\n", __func__, split_path, n_split);
return NULL;
Expand Down
4 changes: 2 additions & 2 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,5 +306,5 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
//
// Split utils
//
static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT = "split.no";
static const char * const LLM_KV_GENERAL_SPLIT_N_SPLIT = "split.count";
static const char * const LLM_KV_SPLIT_NO = "split.no";
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
16 changes: 8 additions & 8 deletions examples/gguf-split/gguf-split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ enum split_operation : uint8_t {
SPLIT_OP_MERGE,
};

static const char * const LLM_KV_GENERAL_SPLIT_N_TENSORS = "split.tensors.count";
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";

struct split_params {
split_operation operation = SPLIT_OP_SPLIT;
Expand Down Expand Up @@ -175,9 +175,9 @@ struct split_strategy {
if (i_split == 0) {
gguf_set_kv(ctx_out, ctx_gguf);
}
gguf_set_val_u16(ctx_out, LLM_KV_GENERAL_SPLIT_I_SPLIT, i_split);
gguf_set_val_u16(ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, n_split);
gguf_set_val_i32(ctx_out, LLM_KV_GENERAL_SPLIT_N_TENSORS,n_tensors);
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO, i_split);
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT, n_split);
gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT, n_tensors);

// populate the original tensors, so we get an initial metadata
for (int i = i_split * params.n_split_tensors; i < n_tensors && i < (i_split + 1) * params.n_split_tensors; ++i) {
Expand Down Expand Up @@ -326,12 +326,12 @@ static void gguf_merge(const split_params & split_params) {
ctx_metas.push_back(ctx_meta);

if (i_split == 0) {
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_GENERAL_SPLIT_N_SPLIT);
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
if (key_n_split < 0) {
fprintf(stderr,
"\n%s: input file does not contain %s metadata\n",
__func__,
LLM_KV_GENERAL_SPLIT_N_SPLIT);
LLM_KV_SPLIT_COUNT);
gguf_free(ctx_gguf);
ggml_free(ctx_meta);
gguf_free(ctx_out);
Expand All @@ -353,7 +353,7 @@ static void gguf_merge(const split_params & split_params) {
}

// Verify the file naming and extract split_prefix
if (!llama_split_prefix(split_prefix, split_path, strlen(split_path), i_split, n_split)) {
if (!llama_split_prefix(split_prefix, sizeof (split_prefix), split_path, i_split, n_split)) {
fprintf(stderr, "\n%s: unexpected input file name: %s"
" i_split=%d"
" n_split=%d\n", __func__,
Expand All @@ -366,7 +366,7 @@ static void gguf_merge(const split_params & split_params) {
}

// Do not trigger merge if we try to merge again the output
gguf_set_val_u16(ctx_gguf, LLM_KV_GENERAL_SPLIT_N_SPLIT, 0);
gguf_set_val_u16(ctx_gguf, LLM_KV_SPLIT_COUNT, 0);

// Set metadata from the first split
gguf_set_kv(ctx_out, ctx_gguf);
Expand Down
Loading

0 comments on commit a4a6d95

Please sign in to comment.