general improvements, enable server prompt caching
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
#include <nlohmann/json_fwd.hpp>
|
||||
|
||||
#include <random>
|
||||
#include <atomic>
|
||||
|
||||
struct LlamaCppWeb : public TextCompletionI {
|
||||
// this mutex locks internally
|
||||
@@ -14,6 +15,8 @@ struct LlamaCppWeb : public TextCompletionI {
|
||||
// this is a bad idea
|
||||
static std::minstd_rand thread_local _rng;
|
||||
|
||||
std::atomic<bool> _use_server_cache {true};
|
||||
|
||||
~LlamaCppWeb(void);
|
||||
|
||||
bool isGood(void) override;
|
||||
|
||||
Reference in New Issue
Block a user