⚡️ context pruning

continuedev · May 17, 2024 · b670949 · b670949
1 parent be741ac
commit b670949
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 15 deletions.
diff --git a/core/config/default.ts b/core/config/default.ts
@@ -1,4 +1,7 @@
-import { ContextProviderWithParams, SerializedContinueConfig } from "../index.js";
+import {
+  ContextProviderWithParams,
+  SerializedContinueConfig,
+} from "../index.js";
 
 export const defaultConfig: SerializedContinueConfig = {
   models: [

diff --git a/core/context/retrieval/retrieval.ts b/core/context/retrieval/retrieval.ts
@@ -126,9 +126,9 @@ export async function retrieveContextItemsFromEmbeddings(
     );
 
     results.sort(
-      (a, b) => scores[results.indexOf(b)] - scores[results.indexOf(a)],
+      (a, b) => scores[results.indexOf(a)] - scores[results.indexOf(b)],
     );
-    results = results.slice(0, nFinal);
+    results = results.slice(-nFinal);
   }
 
   if (results.length === 0) {

diff --git a/core/llm/constants.ts b/core/llm/constants.ts
@@ -7,20 +7,22 @@ const DEFAULT_ARGS = {
   temperature: DEFAULT_TEMPERATURE,
 };
 
+const GPT_4_CTX_LEN = 4096;
+
 const CONTEXT_LENGTH_FOR_MODEL: { [name: string]: number } = {
   "gpt-3.5-turbo": 4096,
   "gpt-3.5-turbo-0613": 4096,
   "gpt-3.5-turbo-16k": 16_384,
-  "gpt-4": 8192,
   "gpt-35-turbo-16k": 16_384,
   "gpt-35-turbo-0613": 4096,
   "gpt-35-turbo": 4096,
-  "gpt-4-32k": 32_768,
-  "gpt-4-turbo-preview": 128_000,
-  "gpt-4o": 128_000,
-  "gpt-4-vision": 128_000,
-  "gpt-4-0125-preview": 128_000,
-  "gpt-4-1106-preview": 128_000,
+  "gpt-4": GPT_4_CTX_LEN,
+  "gpt-4-32k": GPT_4_CTX_LEN,
+  "gpt-4-turbo-preview": GPT_4_CTX_LEN,
+  "gpt-4o": GPT_4_CTX_LEN,
+  "gpt-4-vision": GPT_4_CTX_LEN,
+  "gpt-4-0125-preview": GPT_4_CTX_LEN,
+  "gpt-4-1106-preview": GPT_4_CTX_LEN,
 };
 
 const TOKEN_BUFFER_FOR_SAFETY = 350;
@@ -35,6 +37,5 @@ export {
   DEFAULT_MAX_TOKENS,
   MAX_CHUNK_SIZE,
   PROXY_URL,
-  TOKEN_BUFFER_FOR_SAFETY
+  TOKEN_BUFFER_FOR_SAFETY,
 };
-
diff --git a/core/util/verticalEdit.ts b/core/util/verticalEdit.ts
@@ -8,7 +8,13 @@ import {
 } from "../autocomplete/lineStream.js";
 import { streamDiff } from "../diff/streamDiff.js";
 import { streamLines } from "../diff/util.js";
-import { ChatMessage, DiffLine, ILLM } from "../index.js";
+import {
+  ChatMessage,
+  DiffLine,
+  ILLM,
+  LLMFullCompletionOptions,
+  ModelProvider,
+} from "../index.js";
 import { gptEditPrompt } from "../llm/templates/edit.js";
 import { Telemetry } from "./posthog.js";
 
@@ -46,6 +52,10 @@ function modelIsInept(model: string): boolean {
   return !(model.includes("gpt") || model.includes("claude"));
 }
 
+function isGpt4Trial(model: string, provider: ModelProvider): boolean {
+  return provider === "free-trial" && model.startsWith("gpt-4");
+}
+
 export async function* streamDiffLines(
   prefix: string,
   highlighted: string,
@@ -82,6 +92,10 @@ export async function* streamDiffLines(
   );
   const inept = modelIsInept(llm.model);
 
+  const options: LLMFullCompletionOptions = {};
+  if (isGpt4Trial(llm.model, llm.providerName)) {
+    options.maxTokens = 2048;
+  }
   const completion =
     typeof prompt === "string"
       ? llm.streamComplete(prompt, { raw: true })
@@ -106,11 +120,11 @@ export async function* streamDiffLines(
     diffLines = addIndentation(diffLines, indentation);
   }
 
-  let seenGreen = false
+  let seenGreen = false;
   for await (let diffLine of diffLines) {
     yield diffLine;
     if (diffLine.type === "new") {
-      seenGreen = true
+      seenGreen = true;
     } else if (onlyOneInsertion && seenGreen && diffLine.type === "same") {
       break;
     }