llamaindex
Advanced tools
Comparing version 0.0.0-20230926204311 to 0.0.0-20231006013839
# llamaindex | ||
## 0.0.0-20230926204311 | ||
## 0.0.0-20231006013839 | ||
### Patch Changes | ||
- 139abad: Streaming improvements including Anthropic (thanks @kkang2097) | ||
- 139abad: Portkey integration (Thank you @noble-varghese) | ||
- 139abad: Pinecone demo (thanks @Einsenhorn) | ||
## 0.0.29 | ||
### Patch Changes | ||
- a52143b: Added DocxReader for Word documents (thanks @jayantasamaddar) | ||
- 1b7fd95: Updated OpenAI streaming (thanks @kkang2097) | ||
- 0db3f41: Migrated to Tiktoken lite, which hopefully fixes the Windows issue | ||
@@ -9,0 +18,0 @@ |
{ | ||
"name": "llamaindex", | ||
"version": "0.0.0-20230926204311", | ||
"version": "0.0.0-20231006013839", | ||
"dependencies": { | ||
@@ -12,5 +12,6 @@ "@anthropic-ai/sdk": "^0.6.2", | ||
"notion-md-crawler": "^0.0.2", | ||
"openai": "^4.10.0", | ||
"openai": "^4.11.1", | ||
"papaparse": "^5.4.1", | ||
"pdf-parse": "^1.1.1", | ||
"portkey-ai": "^0.1.11", | ||
"rake-modified": "^1.0.8", | ||
@@ -24,3 +25,3 @@ "replicate": "^0.18.1", | ||
"@types/lodash": "^4.14.199", | ||
"@types/node": "^18.18.0", | ||
"@types/node": "^18.18.3", | ||
"@types/papaparse": "^5.3.9", | ||
@@ -27,0 +28,0 @@ "@types/pdf-parse": "^1.1.2", |
@@ -23,3 +23,4 @@ import { NodeWithScore } from "../Node"; | ||
export interface StreamToken { | ||
//Specify StreamToken per mainstream LLM | ||
export interface DefaultStreamToken { | ||
id: string; | ||
@@ -39,6 +40,24 @@ object: string; | ||
//OpenAI stream token schema is the default. | ||
//Note: Anthropic and Replicate also use similar token schemas. | ||
export type OpenAIStreamToken = DefaultStreamToken; | ||
export type AnthropicStreamToken = { | ||
completion: string; | ||
model: string; | ||
stop_reason: string | undefined; | ||
stop?: boolean | undefined; | ||
log_id?: string; | ||
}; | ||
// | ||
//Callback Responses | ||
// | ||
//TODO: Write Embedding Callbacks | ||
//StreamCallbackResponse should let practitioners implement callbacks out of the box... | ||
//When custom streaming LLMs are involved, people are expected to write their own StreamCallbackResponses | ||
export interface StreamCallbackResponse extends BaseCallbackResponse { | ||
index: number; | ||
isDone?: boolean; | ||
token?: StreamToken; | ||
token?: DefaultStreamToken; | ||
} | ||
@@ -45,0 +64,0 @@ |
@@ -26,4 +26,12 @@ import { v4 as uuidv4 } from "uuid"; | ||
* @param chatHistory optional chat history if you want to customize the chat history | ||
* @param streaming optional streaming flag, which auto-sets the return value if True. | ||
*/ | ||
chat(message: string, chatHistory?: ChatMessage[]): Promise<Response>; | ||
chat< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : Response, | ||
>( | ||
message: string, | ||
chatHistory?: ChatMessage[], | ||
streaming?: T, | ||
): Promise<R>; | ||
@@ -48,11 +56,43 @@ /** | ||
async chat(message: string, chatHistory?: ChatMessage[]): Promise<Response> { | ||
async chat< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : Response, | ||
>(message: string, chatHistory?: ChatMessage[], streaming?: T): Promise<R> { | ||
//Streaming option | ||
if (streaming) { | ||
return this.streamChat(message, chatHistory) as R; | ||
} | ||
//Non-streaming option | ||
chatHistory = chatHistory ?? this.chatHistory; | ||
chatHistory.push({ content: message, role: "user" }); | ||
const response = await this.llm.chat(chatHistory); | ||
const response = await this.llm.chat(chatHistory, undefined); | ||
chatHistory.push(response.message); | ||
this.chatHistory = chatHistory; | ||
return new Response(response.message.content); | ||
return new Response(response.message.content) as R; | ||
} | ||
protected async *streamChat( | ||
message: string, | ||
chatHistory?: ChatMessage[], | ||
): AsyncGenerator<string, void, unknown> { | ||
chatHistory = chatHistory ?? this.chatHistory; | ||
chatHistory.push({ content: message, role: "user" }); | ||
const response_generator = await this.llm.chat( | ||
chatHistory, | ||
undefined, | ||
true, | ||
); | ||
var accumulator: string = ""; | ||
for await (const part of response_generator) { | ||
accumulator += part; | ||
yield part; | ||
} | ||
chatHistory.push({ content: accumulator, role: "assistant" }); | ||
this.chatHistory = chatHistory; | ||
return; | ||
} | ||
reset() { | ||
@@ -104,6 +144,10 @@ this.chatHistory = []; | ||
async chat( | ||
async chat< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : Response, | ||
>( | ||
message: string, | ||
chatHistory?: ChatMessage[] | undefined, | ||
): Promise<Response> { | ||
streaming?: T, | ||
): Promise<R> { | ||
chatHistory = chatHistory ?? this.chatHistory; | ||
@@ -120,3 +164,3 @@ | ||
return response; | ||
return response as R; | ||
} | ||
@@ -136,3 +180,3 @@ | ||
retriever: BaseRetriever; | ||
chatModel: OpenAI; | ||
chatModel: LLM; | ||
chatHistory: ChatMessage[]; | ||
@@ -143,3 +187,3 @@ contextSystemPrompt: ContextSystemPrompt; | ||
retriever: BaseRetriever; | ||
chatModel?: OpenAI; | ||
chatModel?: LLM; | ||
chatHistory?: ChatMessage[]; | ||
@@ -156,5 +200,17 @@ contextSystemPrompt?: ContextSystemPrompt; | ||
async chat(message: string, chatHistory?: ChatMessage[] | undefined) { | ||
async chat< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : Response, | ||
>( | ||
message: string, | ||
chatHistory?: ChatMessage[] | undefined, | ||
streaming?: T, | ||
): Promise<R> { | ||
chatHistory = chatHistory ?? this.chatHistory; | ||
//Streaming option | ||
if (streaming) { | ||
return this.streamChat(message, chatHistory) as R; | ||
} | ||
const parentEvent: Event = { | ||
@@ -192,3 +248,48 @@ id: uuidv4(), | ||
sourceNodesWithScore.map((r) => r.node), | ||
) as R; | ||
} | ||
protected async *streamChat( | ||
message: string, | ||
chatHistory?: ChatMessage[] | undefined, | ||
): AsyncGenerator<string, void, unknown> { | ||
chatHistory = chatHistory ?? this.chatHistory; | ||
const parentEvent: Event = { | ||
id: uuidv4(), | ||
type: "wrapper", | ||
tags: ["final"], | ||
}; | ||
const sourceNodesWithScore = await this.retriever.retrieve( | ||
message, | ||
parentEvent, | ||
); | ||
const systemMessage: ChatMessage = { | ||
content: this.contextSystemPrompt({ | ||
context: sourceNodesWithScore | ||
.map((r) => (r.node as TextNode).text) | ||
.join("\n\n"), | ||
}), | ||
role: "system", | ||
}; | ||
chatHistory.push({ content: message, role: "user" }); | ||
const response_stream = await this.chatModel.chat( | ||
[systemMessage, ...chatHistory], | ||
parentEvent, | ||
true, | ||
); | ||
var accumulator: string = ""; | ||
for await (const part of response_stream) { | ||
accumulator += part; | ||
yield part; | ||
} | ||
chatHistory.push({ content: accumulator, role: "system" }); | ||
this.chatHistory = chatHistory; | ||
return; | ||
} | ||
@@ -214,9 +315,40 @@ | ||
async chat(message: string): Promise<Response> { | ||
async chat< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : Response, | ||
>( | ||
message: string, | ||
chatHistory?: ChatMessage[] | undefined, | ||
streaming?: T, | ||
): Promise<R> { | ||
//Streaming option | ||
if (streaming) { | ||
return this.streamChat(message, chatHistory) as R; | ||
} | ||
this.chatHistory.addMessage({ content: message, role: "user" }); | ||
const response = await this.llm.chat(this.chatHistory.messages); | ||
const response = await this.llm.chat(this.chatHistory.requestMessages); | ||
this.chatHistory.addMessage(response.message); | ||
return new Response(response.message.content); | ||
return new Response(response.message.content) as R; | ||
} | ||
protected async *streamChat( | ||
message: string, | ||
chatHistory?: ChatMessage[] | undefined, | ||
): AsyncGenerator<string, void, unknown> { | ||
this.chatHistory.addMessage({ content: message, role: "user" }); | ||
const response_stream = await this.llm.chat( | ||
this.chatHistory.requestMessages, | ||
undefined, | ||
true, | ||
); | ||
var accumulator = ""; | ||
for await (const part of response_stream) { | ||
accumulator += part; | ||
yield part; | ||
} | ||
this.chatHistory.addMessage({ content: accumulator, role: "user" }); | ||
return; | ||
} | ||
reset() { | ||
@@ -223,0 +355,0 @@ this.chatHistory.reset(); |
@@ -20,2 +20,7 @@ import { ChatMessage, LLM, OpenAI } from "./llm/LLM"; | ||
/** | ||
* Returns the messages that should be used as input to the LLM. | ||
*/ | ||
requestMessages: ChatMessage[]; | ||
/** | ||
* Resets the chat history so that it's empty. | ||
@@ -32,3 +37,2 @@ */ | ||
} | ||
async addMessage(message: ChatMessage) { | ||
@@ -38,2 +42,6 @@ this.messages.push(message); | ||
get requestMessages() { | ||
return this.messages; | ||
} | ||
reset() { | ||
@@ -45,2 +53,3 @@ this.messages = []; | ||
export class SummaryChatHistory implements ChatHistory { | ||
messagesToSummarize: number; | ||
messages: ChatMessage[]; | ||
@@ -51,2 +60,3 @@ summaryPrompt: SummaryPrompt; | ||
constructor(init?: Partial<SummaryChatHistory>) { | ||
this.messagesToSummarize = init?.messagesToSummarize ?? 5; | ||
this.messages = init?.messages ?? []; | ||
@@ -58,3 +68,6 @@ this.summaryPrompt = init?.summaryPrompt ?? defaultSummaryPrompt; | ||
private async summarize() { | ||
const chatHistoryStr = messagesToHistoryStr(this.messages); | ||
// get all messages after the last summary message (including) | ||
const chatHistoryStr = messagesToHistoryStr( | ||
this.messages.slice(this.getLastSummaryIndex()), | ||
); | ||
@@ -65,13 +78,46 @@ const response = await this.llm.complete( | ||
this.messages = [{ content: response.message.content, role: "system" }]; | ||
this.messages.push({ content: response.message.content, role: "memory" }); | ||
} | ||
async addMessage(message: ChatMessage) { | ||
// TODO: check if summarization is necessary | ||
// TBD what are good conditions, e.g. depending on the context length of the LLM? | ||
// for now we just have a dummy implementation at always summarizes the messages | ||
await this.summarize(); | ||
const lastSummaryIndex = this.getLastSummaryIndex(); | ||
// if there are more than or equal `messagesToSummarize` messages since the last summary, call summarize | ||
if ( | ||
lastSummaryIndex !== -1 && | ||
this.messages.length - lastSummaryIndex - 1 >= this.messagesToSummarize | ||
) { | ||
// TODO: define what are better conditions, e.g. depending on the context length of the LLM? | ||
// for now we just summarize each `messagesToSummarize` messages | ||
await this.summarize(); | ||
} | ||
this.messages.push(message); | ||
} | ||
// Find last summary message | ||
private getLastSummaryIndex() { | ||
return this.messages | ||
.slice() | ||
.reverse() | ||
.findIndex((message) => message.role === "memory"); | ||
} | ||
get requestMessages() { | ||
const lastSummaryIndex = this.getLastSummaryIndex(); | ||
// get array of all system messages | ||
const systemMessages = this.messages.filter( | ||
(message) => message.role === "system", | ||
); | ||
// convert summary message so it can be send to the LLM | ||
const summaryMessage: ChatMessage = { | ||
content: `This is a summary of conversation so far: ${this.messages[lastSummaryIndex].content}`, | ||
role: "system", | ||
}; | ||
// return system messages, last summary and all messages after the last summary message | ||
return [ | ||
...systemMessages, | ||
summaryMessage, | ||
...this.messages.slice(lastSummaryIndex + 1), | ||
]; | ||
} | ||
reset() { | ||
@@ -78,0 +124,0 @@ this.messages = []; |
@@ -0,1 +1,3 @@ | ||
import { Event } from "../../callbacks/CallbackManager"; | ||
import { DEFAULT_SIMILARITY_TOP_K } from "../../constants"; | ||
import { globalsHelper } from "../../GlobalsHelper"; | ||
@@ -5,4 +7,2 @@ import { NodeWithScore } from "../../Node"; | ||
import { ServiceContext } from "../../ServiceContext"; | ||
import { Event } from "../../callbacks/CallbackManager"; | ||
import { DEFAULT_SIMILARITY_TOP_K } from "../../constants"; | ||
import { | ||
@@ -36,3 +36,3 @@ VectorStoreQuery, | ||
async retrieve(query: string, parentEvent?: Event): Promise<NodeWithScore[]> { | ||
async retrieve(query: string, parentEvent?: Event, preFilters?: unknown): Promise<NodeWithScore[]> { | ||
const queryEmbedding = | ||
@@ -46,6 +46,11 @@ await this.serviceContext.embedModel.getQueryEmbedding(query); | ||
}; | ||
const result = await this.index.vectorStore.query(q); | ||
const result = await this.index.vectorStore.query(q, preFilters); | ||
let nodesWithScores: NodeWithScore[] = []; | ||
for (let i = 0; i < result.ids.length; i++) { | ||
const nodeFromResult = result.nodes?.[i]; | ||
if (!this.index.indexStruct.nodesDict[result.ids[i]] && nodeFromResult) { | ||
this.index.indexStruct.nodesDict[result.ids[i]] = nodeFromResult; | ||
} | ||
const node = this.index.indexStruct.nodesDict[result.ids[i]]; | ||
@@ -52,0 +57,0 @@ nodesWithScores.push({ |
@@ -222,2 +222,23 @@ import { BaseNode, Document, MetadataMode } from "../../Node"; | ||
static async fromVectorStore( | ||
vectorStore: VectorStore, | ||
serviceContext: ServiceContext, | ||
) { | ||
if (!vectorStore.storesText) { | ||
throw new Error( | ||
"Cannot initialize from a vector store that does not store text", | ||
); | ||
} | ||
const storageContext = await storageContextFromDefaults({ vectorStore }); | ||
const index = await VectorStoreIndex.init({ | ||
nodes: [], | ||
storageContext, | ||
serviceContext, | ||
}); | ||
return index; | ||
} | ||
asRetriever(options?: any): VectorIndexRetriever { | ||
@@ -224,0 +245,0 @@ return new VectorIndexRetriever({ index: this, ...options }); |
import OpenAILLM, { ClientOptions as OpenAIClientOptions } from "openai"; | ||
import { CallbackManager, Event } from "../callbacks/CallbackManager"; | ||
import { handleOpenAIStream } from "../callbacks/utility/handleOpenAIStream"; | ||
import { | ||
AnthropicStreamToken, | ||
CallbackManager, | ||
Event, | ||
EventType, | ||
OpenAIStreamToken, | ||
StreamCallbackResponse, | ||
} from "../callbacks/CallbackManager"; | ||
import { LLMOptions } from "portkey-ai"; | ||
import { | ||
AnthropicSession, | ||
ANTHROPIC_AI_PROMPT, | ||
ANTHROPIC_HUMAN_PROMPT, | ||
AnthropicSession, | ||
getAnthropicSession, | ||
@@ -17,3 +25,4 @@ } from "./anthropic"; | ||
} from "./azure"; | ||
import { OpenAISession, getOpenAISession } from "./openai"; | ||
import { getOpenAISession, OpenAISession } from "./openai"; | ||
import { getPortkeySession, PortkeySession } from "./portkey"; | ||
import { ReplicateSession } from "./replicate"; | ||
@@ -26,3 +35,4 @@ | ||
| "generic" | ||
| "function"; | ||
| "function" | ||
| "memory"; | ||
@@ -47,7 +57,18 @@ export interface ChatMessage { | ||
export interface LLM { | ||
// Whether a LLM has streaming support | ||
hasStreaming: boolean; | ||
/** | ||
* Get a chat response from the LLM | ||
* @param messages | ||
* | ||
* The return type of chat() and complete() are set by the "streaming" parameter being set to True. | ||
*/ | ||
chat(messages: ChatMessage[], parentEvent?: Event): Promise<ChatResponse>; | ||
chat< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, | ||
>( | ||
messages: ChatMessage[], | ||
parentEvent?: Event, | ||
streaming?: T, | ||
): Promise<R>; | ||
@@ -58,3 +79,10 @@ /** | ||
*/ | ||
complete(prompt: string, parentEvent?: Event): Promise<CompletionResponse>; | ||
complete< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, | ||
>( | ||
prompt: string, | ||
parentEvent?: Event, | ||
streaming?: T, | ||
): Promise<R>; | ||
} | ||
@@ -84,2 +112,4 @@ | ||
export class OpenAI implements LLM { | ||
hasStreaming: boolean = true; | ||
// Per completion OpenAI params | ||
@@ -91,3 +121,3 @@ model: keyof typeof ALL_AVAILABLE_OPENAI_MODELS; | ||
additionalChatOptions?: Omit< | ||
Partial<OpenAILLM.Chat.CompletionCreateParams>, | ||
Partial<OpenAILLM.Chat.ChatCompletionCreateParams>, | ||
"max_tokens" | "messages" | "model" | "temperature" | "top_p" | "streaming" | ||
@@ -179,7 +209,55 @@ >; | ||
async chat( | ||
async chat< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, | ||
>(messages: ChatMessage[], parentEvent?: Event, streaming?: T): Promise<R> { | ||
const baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams = { | ||
model: this.model, | ||
temperature: this.temperature, | ||
max_tokens: this.maxTokens, | ||
messages: messages.map((message) => ({ | ||
role: this.mapMessageType(message.role), | ||
content: message.content, | ||
})), | ||
top_p: this.topP, | ||
...this.additionalChatOptions, | ||
}; | ||
// Streaming | ||
if (streaming) { | ||
if (!this.hasStreaming) { | ||
throw Error("No streaming support for this LLM."); | ||
} | ||
return this.streamChat(messages, parentEvent) as R; | ||
} | ||
// Non-streaming | ||
const response = await this.session.openai.chat.completions.create({ | ||
...baseRequestParams, | ||
stream: false, | ||
}); | ||
const content = response.choices[0].message?.content ?? ""; | ||
return { | ||
message: { content, role: response.choices[0].message.role }, | ||
} as R; | ||
} | ||
async complete< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, | ||
>(prompt: string, parentEvent?: Event, streaming?: T): Promise<R> { | ||
return this.chat( | ||
[{ content: prompt, role: "user" }], | ||
parentEvent, | ||
streaming, | ||
); | ||
} | ||
//We can wrap a stream in a generator to add some additional logging behavior | ||
//For future edits: syntax for generator type is <typeof Yield, typeof Return, typeof Accept> | ||
//"typeof Accept" refers to what types you'll accept when you manually call generator.next(<AcceptType>) | ||
protected async *streamChat( | ||
messages: ChatMessage[], | ||
parentEvent?: Event, | ||
): Promise<ChatResponse> { | ||
const baseRequestParams: OpenAILLM.Chat.CompletionCreateParams = { | ||
): AsyncGenerator<string, void, unknown> { | ||
const baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams = { | ||
model: this.model, | ||
@@ -196,5 +274,9 @@ temperature: this.temperature, | ||
if (this.callbackManager?.onLLMStream) { | ||
// Streaming | ||
const response = await this.session.openai.chat.completions.create({ | ||
//Now let's wrap our stream in a callback | ||
const onLLMStream = this.callbackManager?.onLLMStream | ||
? this.callbackManager.onLLMStream | ||
: () => {}; | ||
const chunk_stream: AsyncIterable<OpenAIStreamToken> = | ||
await this.session.openai.chat.completions.create({ | ||
...baseRequestParams, | ||
@@ -204,25 +286,39 @@ stream: true, | ||
const { message, role } = await handleOpenAIStream({ | ||
response, | ||
onLLMStream: this.callbackManager.onLLMStream, | ||
parentEvent, | ||
}); | ||
return { message: { content: message, role } }; | ||
} else { | ||
// Non-streaming | ||
const response = await this.session.openai.chat.completions.create({ | ||
...baseRequestParams, | ||
stream: false, | ||
}); | ||
const event: Event = parentEvent | ||
? parentEvent | ||
: { | ||
id: "unspecified", | ||
type: "llmPredict" as EventType, | ||
}; | ||
const content = response.choices[0].message?.content ?? ""; | ||
return { message: { content, role: response.choices[0].message.role } }; | ||
//Indices | ||
var idx_counter: number = 0; | ||
for await (const part of chunk_stream) { | ||
//Increment | ||
part.choices[0].index = idx_counter; | ||
const is_done: boolean = | ||
part.choices[0].finish_reason === "stop" ? true : false; | ||
//onLLMStream Callback | ||
const stream_callback: StreamCallbackResponse = { | ||
event: event, | ||
index: idx_counter, | ||
isDone: is_done, | ||
token: part, | ||
}; | ||
onLLMStream(stream_callback); | ||
idx_counter++; | ||
yield part.choices[0].delta.content ? part.choices[0].delta.content : ""; | ||
} | ||
return; | ||
} | ||
async complete( | ||
prompt: string, | ||
//streamComplete doesn't need to be async because it's child function is already async | ||
protected streamComplete( | ||
query: string, | ||
parentEvent?: Event, | ||
): Promise<CompletionResponse> { | ||
return this.chat([{ content: prompt, role: "user" }], parentEvent); | ||
): AsyncGenerator<string, void, unknown> { | ||
return this.streamChat([{ content: query, role: "user" }], parentEvent); | ||
} | ||
@@ -291,2 +387,3 @@ } | ||
replicateSession: ReplicateSession; | ||
hasStreaming: boolean; | ||
@@ -306,2 +403,3 @@ constructor(init?: Partial<LlamaDeuce>) { | ||
this.replicateSession = init?.replicateSession ?? new ReplicateSession(); | ||
this.hasStreaming = init?.hasStreaming ?? false; | ||
} | ||
@@ -413,6 +511,6 @@ | ||
async chat( | ||
messages: ChatMessage[], | ||
_parentEvent?: Event, | ||
): Promise<ChatResponse> { | ||
async chat< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, | ||
>(messages: ChatMessage[], _parentEvent?: Event, streaming?: T): Promise<R> { | ||
const api = ALL_AVAILABLE_LLAMADEUCE_MODELS[this.model] | ||
@@ -438,2 +536,5 @@ .replicateApi as `${string}/${string}:${string}`; | ||
//TODO: Add streaming for this | ||
//Non-streaming | ||
const response = await this.replicateSession.replicate.run( | ||
@@ -449,9 +550,9 @@ api, | ||
}, | ||
}; | ||
} as R; | ||
} | ||
async complete( | ||
prompt: string, | ||
parentEvent?: Event, | ||
): Promise<CompletionResponse> { | ||
async complete< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, | ||
>(prompt: string, parentEvent?: Event, streaming?: T): Promise<R> { | ||
return this.chat([{ content: prompt, role: "user" }], parentEvent); | ||
@@ -466,2 +567,4 @@ } | ||
export class Anthropic implements LLM { | ||
hasStreaming: boolean = true; | ||
// Per completion Anthropic params | ||
@@ -516,6 +619,18 @@ model: string; | ||
async chat( | ||
async chat< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, | ||
>( | ||
messages: ChatMessage[], | ||
parentEvent?: Event | undefined, | ||
): Promise<ChatResponse> { | ||
streaming?: T, | ||
): Promise<R> { | ||
//Streaming | ||
if (streaming) { | ||
if (!this.hasStreaming) { | ||
throw Error("No streaming support for this LLM."); | ||
} | ||
return this.streamChat(messages, parentEvent) as R; | ||
} | ||
//Non-streaming | ||
const response = await this.session.anthropic.completions.create({ | ||
@@ -533,10 +648,172 @@ model: this.model, | ||
// That space will be re-added when we generate the next prompt. | ||
}; | ||
} as R; | ||
} | ||
async complete( | ||
protected async *streamChat( | ||
messages: ChatMessage[], | ||
parentEvent?: Event | undefined, | ||
): AsyncGenerator<string, void, unknown> { | ||
// AsyncIterable<AnthropicStreamToken> | ||
const stream: AsyncIterable<AnthropicStreamToken> = | ||
await this.session.anthropic.completions.create({ | ||
model: this.model, | ||
prompt: this.mapMessagesToPrompt(messages), | ||
max_tokens_to_sample: this.maxTokens ?? 100000, | ||
temperature: this.temperature, | ||
top_p: this.topP, | ||
stream: true, | ||
}); | ||
var idx_counter: number = 0; | ||
for await (const part of stream) { | ||
//TODO: LLM Stream Callback, pending re-work. | ||
idx_counter++; | ||
yield part.completion; | ||
} | ||
return; | ||
} | ||
async complete< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, | ||
>( | ||
prompt: string, | ||
parentEvent?: Event | undefined, | ||
): Promise<CompletionResponse> { | ||
return this.chat([{ content: prompt, role: "user" }], parentEvent); | ||
streaming?: T, | ||
): Promise<R> { | ||
if (streaming) { | ||
return this.streamComplete(prompt, parentEvent) as R; | ||
} | ||
return this.chat( | ||
[{ content: prompt, role: "user" }], | ||
parentEvent, | ||
streaming, | ||
) as R; | ||
} | ||
protected streamComplete( | ||
prompt: string, | ||
parentEvent?: Event | undefined, | ||
): AsyncGenerator<string, void, unknown> { | ||
return this.streamChat([{ content: prompt, role: "user" }], parentEvent); | ||
} | ||
} | ||
export class Portkey implements LLM { | ||
hasStreaming: boolean = true; | ||
apiKey?: string = undefined; | ||
baseURL?: string = undefined; | ||
mode?: string = undefined; | ||
llms?: [LLMOptions] | null = undefined; | ||
session: PortkeySession; | ||
callbackManager?: CallbackManager; | ||
constructor(init?: Partial<Portkey>) { | ||
this.apiKey = init?.apiKey; | ||
this.baseURL = init?.baseURL; | ||
this.mode = init?.mode; | ||
this.llms = init?.llms; | ||
this.session = getPortkeySession({ | ||
apiKey: this.apiKey, | ||
baseURL: this.baseURL, | ||
llms: this.llms, | ||
mode: this.mode, | ||
}); | ||
this.callbackManager = init?.callbackManager; | ||
} | ||
async chat< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, | ||
>( | ||
messages: ChatMessage[], | ||
parentEvent?: Event | undefined, | ||
streaming?: T, | ||
params?: Record<string, any>, | ||
): Promise<R> { | ||
if (streaming) { | ||
return this.streamChat(messages, parentEvent, params) as R; | ||
} else { | ||
const resolvedParams = params || {}; | ||
const response = await this.session.portkey.chatCompletions.create({ | ||
messages, | ||
...resolvedParams, | ||
}); | ||
const content = response.choices[0].message?.content ?? ""; | ||
const role = response.choices[0].message?.role || "assistant"; | ||
return { message: { content, role: role as MessageType } } as R; | ||
} | ||
} | ||
async complete< | ||
T extends boolean | undefined = undefined, | ||
R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, | ||
>( | ||
prompt: string, | ||
parentEvent?: Event | undefined, | ||
streaming?: T, | ||
): Promise<R> { | ||
return this.chat( | ||
[{ content: prompt, role: "user" }], | ||
parentEvent, | ||
streaming, | ||
); | ||
} | ||
async *streamChat( | ||
messages: ChatMessage[], | ||
parentEvent?: Event, | ||
params?: Record<string, any>, | ||
): AsyncGenerator<string, void, unknown> { | ||
// Wrapping the stream in a callback. | ||
const onLLMStream = this.callbackManager?.onLLMStream | ||
? this.callbackManager.onLLMStream | ||
: () => {}; | ||
const chunkStream = await this.session.portkey.chatCompletions.create({ | ||
messages, | ||
...params, | ||
stream: true, | ||
}); | ||
const event: Event = parentEvent | ||
? parentEvent | ||
: { | ||
id: "unspecified", | ||
type: "llmPredict" as EventType, | ||
}; | ||
//Indices | ||
var idx_counter: number = 0; | ||
for await (const part of chunkStream) { | ||
//Increment | ||
part.choices[0].index = idx_counter; | ||
const is_done: boolean = | ||
part.choices[0].finish_reason === "stop" ? true : false; | ||
//onLLMStream Callback | ||
const stream_callback: StreamCallbackResponse = { | ||
event: event, | ||
index: idx_counter, | ||
isDone: is_done, | ||
// token: part, | ||
}; | ||
onLLMStream(stream_callback); | ||
idx_counter++; | ||
yield part.choices[0].delta?.content ?? ""; | ||
} | ||
return; | ||
} | ||
streamComplete( | ||
query: string, | ||
parentEvent?: Event, | ||
): AsyncGenerator<string, void, unknown> { | ||
return this.streamChat([{ content: query, role: "user" }], parentEvent); | ||
} | ||
} |
import { v4 as uuidv4 } from "uuid"; | ||
import { Event } from "./callbacks/CallbackManager"; | ||
import { NodeWithScore, TextNode } from "./Node"; | ||
@@ -13,3 +14,2 @@ import { | ||
import { QueryEngineTool, ToolMetadata } from "./Tool"; | ||
import { Event } from "./callbacks/CallbackManager"; | ||
@@ -34,2 +34,3 @@ /** | ||
responseSynthesizer: ResponseSynthesizer; | ||
preFilters?: unknown; | ||
@@ -39,2 +40,3 @@ constructor( | ||
responseSynthesizer?: ResponseSynthesizer, | ||
preFilters?: unknown, | ||
) { | ||
@@ -46,2 +48,3 @@ this.retriever = retriever; | ||
responseSynthesizer || new ResponseSynthesizer({ serviceContext }); | ||
this.preFilters = preFilters; | ||
} | ||
@@ -55,3 +58,7 @@ | ||
}; | ||
const nodes = await this.retriever.retrieve(query, _parentEvent); | ||
const nodes = await this.retriever.retrieve( | ||
query, | ||
_parentEvent, | ||
this.preFilters, | ||
); | ||
return this.responseSynthesizer.synthesize(query, nodes, _parentEvent); | ||
@@ -58,0 +65,0 @@ } |
@@ -0,3 +1,8 @@ | ||
import { Event } from "./callbacks/CallbackManager"; | ||
import { LLM } from "./llm/LLM"; | ||
import { MetadataMode, NodeWithScore } from "./Node"; | ||
import { | ||
defaultRefinePrompt, | ||
defaultTextQaPrompt, | ||
defaultTreeSummarizePrompt, | ||
RefinePrompt, | ||
@@ -7,5 +12,2 @@ SimplePrompt, | ||
TreeSummarizePrompt, | ||
defaultRefinePrompt, | ||
defaultTextQaPrompt, | ||
defaultTreeSummarizePrompt, | ||
} from "./Prompt"; | ||
@@ -15,4 +17,2 @@ import { getBiggestPrompt } from "./PromptHelper"; | ||
import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext"; | ||
import { Event } from "./callbacks/CallbackManager"; | ||
import { LLM } from "./llm/LLM"; | ||
@@ -236,2 +236,3 @@ /** | ||
// Should we send the query here too? | ||
const packedTextChunks = this.serviceContext.promptHelper.repack( | ||
@@ -247,2 +248,3 @@ this.summaryTemplate, | ||
context: packedTextChunks[0], | ||
query, | ||
}), | ||
@@ -258,2 +260,3 @@ parentEvent, | ||
context: chunk, | ||
query, | ||
}), | ||
@@ -260,0 +263,0 @@ parentEvent, |
@@ -0,4 +1,4 @@ | ||
import { Event } from "./callbacks/CallbackManager"; | ||
import { NodeWithScore } from "./Node"; | ||
import { ServiceContext } from "./ServiceContext"; | ||
import { Event } from "./callbacks/CallbackManager"; | ||
@@ -9,4 +9,8 @@ /** | ||
export interface BaseRetriever { | ||
retrieve(query: string, parentEvent?: Event): Promise<NodeWithScore[]>; | ||
retrieve( | ||
query: string, | ||
parentEvent?: Event, | ||
preFilters?: unknown, | ||
): Promise<NodeWithScore[]>; | ||
getServiceContext(): ServiceContext; | ||
} |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 1 instance in 1 package
722924
17941
16
45
+ Addedportkey-ai@^0.1.11
+ Addedportkey-ai@0.1.16(transitive)
Updatedopenai@^4.11.1