general improvements, enable server prompt caching

This commit is contained in:
2024-01-25 13:45:58 +01:00
parent f5650475c7
commit 784fea96d6
6 changed files with 112 additions and 93 deletions

View File

@@ -83,6 +83,7 @@ int64_t LlamaCppWeb::completeSelect(const std::string_view prompt, const std::ve
{"top_p", 1.0}, // disable
{"n_predict", 256}, // unlikely to ever be so high
{"seed", _rng()},
{"cache_prompt", static_cast<bool>(_use_server_cache)},
});
if (ret.empty()) {
@@ -119,6 +120,7 @@ std::string LlamaCppWeb::completeLine(const std::string_view prompt) {
{"n_predict", 1000},
{"seed", _rng()},
{"stop", {"\n"}},
{"cache_prompt", static_cast<bool>(_use_server_cache)},
});
if (ret.empty() || ret.count("content") == 0) {