general improvements, enable server prompt caching

2024-01-25 13:45:58 +01:00
parent f5650475c7
commit 784fea96d6
6 changed files with 112 additions and 93 deletions
--- a/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp
+++ b/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp
@@ -6,6 +6,7 @@
 #include <nlohmann/json_fwd.hpp>

 #include <random>
+#include <atomic>

 struct LlamaCppWeb : public TextCompletionI {
 	// this mutex locks internally
@@ -14,6 +15,8 @@ struct LlamaCppWeb : public TextCompletionI {
 	// this is a bad idea
 	static std::minstd_rand thread_local _rng;

+	std::atomic<bool> _use_server_cache {true};
+
 	~LlamaCppWeb(void);

 	bool isGood(void) override;