nomic-ai · 385olt · Jul 20, 2023 · Jul 20, 2023
diff --git a/gpt4all-bindings/python/gpt4all/gpt4all.py b/gpt4all-bindings/python/gpt4all/gpt4all.py
@@ -252,6 +252,7 @@ def generate(
         n_predict: Optional[int] = None,
         streaming: bool = False,
         callback: pyllmodel.ResponseCallbackType = pyllmodel.empty_response_callback,
+        reverse_prompts: List[str] = [],
     ) -> Union[str, Iterable[str]]:
         """
         Generate outputs from any GPT4All model.
@@ -268,6 +269,7 @@ def generate(
             n_predict: Equivalent to max_tokens, exists for backwards compatibility.
             streaming: If True, this method will instead return a generator that yields tokens as the model generates them.
             callback: A function with arguments token_id:int and response:str, which receives the tokens from the model as they are generated and stops the generation by returning False.
+            reverse_prompts: A list of combinations of tokens which, when generated, stop the generation.
 
         Returns:
             Either the entire completion or a generator that yields the completion token by token.
@@ -282,6 +284,7 @@ def generate(
             repeat_last_n=repeat_last_n,
             n_batch=n_batch,
             n_predict=n_predict if n_predict is not None else max_tokens,
+            reverse_prompts=reverse_prompts,
         )
 
         if self._is_chat_session_activated:

diff --git a/gpt4all-bindings/python/gpt4all/pyllmodel.py b/gpt4all-bindings/python/gpt4all/pyllmodel.py
@@ -275,6 +275,7 @@ def prompt_model(
         repeat_last_n: int = 10,
         context_erase: float = 0.75,
         reset_context: bool = False,
+        reverse_prompts: List[str] = [],
     ):
         """
         Generate response from model from a prompt.
@@ -317,7 +318,7 @@ def prompt_model(
             self.model,
             prompt_ptr,
             PromptCallback(self._prompt_callback),
-            ResponseCallback(self._callback_decoder(callback)),
+            ResponseCallback(self._callback_decoder(callback, reverse_prompts)),
             RecalculateCallback(self._recalculate_callback),
             self.context,
         )
@@ -369,10 +370,31 @@ def run_llmodel_prompt(prompt: str, callback: ResponseCallbackType, **kwargs):
                 break
             yield response
 
-    def _callback_decoder(self, callback: ResponseCallbackType) -> RawResponseCallbackType:
+    def _callback_decoder(self, 
+            callback: ResponseCallbackType, 
+            reverse_prompts: List[str] = []
+        ) -> RawResponseCallbackType:
+
+        self.token_cache = ""
+
         def _raw_callback(token_id: int, response: bytes) -> bool:
-            nonlocal callback
-            return callback(token_id, response.decode("utf-8", "replace"))
+            nonlocal callback, reverse_prompts
+
+            response = response.decode("utf-8", "replace")
+
+            self.token_cache += response
+
+            if self.token_cache in reverse_prompts:
+                self.token_cache = ""
+                return False
+
+            if any(rp.startswith(self.token_cache) for rp in reverse_prompts):
+                return True
+
+            token_passed = self.token_cache
+            self.token_cache = ""
+
+            return callback(token_id, token_passed)
 
         return _raw_callback