@ai-sdk/google
Advanced tools
+6
-0
| # @ai-sdk/google | ||
| ## 3.0.26 | ||
| ### Patch Changes | ||
| - 4c27179: feat(google): allow using Gemini image models with `generateImage` | ||
| ## 3.0.25 | ||
@@ -4,0 +10,0 @@ |
+1
-1
@@ -163,3 +163,3 @@ import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils'; | ||
| type GoogleGenerativeAIImageModelId = 'imagen-4.0-generate-001' | 'imagen-4.0-ultra-generate-001' | 'imagen-4.0-fast-generate-001' | (string & {}); | ||
| type GoogleGenerativeAIImageModelId = 'imagen-4.0-generate-001' | 'imagen-4.0-ultra-generate-001' | 'imagen-4.0-fast-generate-001' | 'gemini-2.5-flash-image' | 'gemini-3-pro-image-preview' | (string & {}); | ||
| interface GoogleGenerativeAIImageSettings { | ||
@@ -166,0 +166,0 @@ /** |
+1
-1
@@ -163,3 +163,3 @@ import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils'; | ||
| type GoogleGenerativeAIImageModelId = 'imagen-4.0-generate-001' | 'imagen-4.0-ultra-generate-001' | 'imagen-4.0-fast-generate-001' | (string & {}); | ||
| type GoogleGenerativeAIImageModelId = 'imagen-4.0-generate-001' | 'imagen-4.0-ultra-generate-001' | 'imagen-4.0-fast-generate-001' | 'gemini-2.5-flash-image' | 'gemini-3-pro-image-preview' | (string & {}); | ||
| interface GoogleGenerativeAIImageSettings { | ||
@@ -166,0 +166,0 @@ /** |
+3
-3
| { | ||
| "name": "@ai-sdk/google", | ||
| "version": "3.0.25", | ||
| "version": "3.0.26", | ||
| "license": "Apache-2.0", | ||
@@ -39,4 +39,4 @@ "sideEffects": false, | ||
| "dependencies": { | ||
| "@ai-sdk/provider": "3.0.8", | ||
| "@ai-sdk/provider-utils": "4.0.14" | ||
| "@ai-sdk/provider-utils": "4.0.14", | ||
| "@ai-sdk/provider": "3.0.8" | ||
| }, | ||
@@ -43,0 +43,0 @@ "devDependencies": { |
@@ -1,5 +0,12 @@ | ||
| import { ImageModelV3, SharedV3Warning } from '@ai-sdk/provider'; | ||
| import { | ||
| ImageModelV3, | ||
| LanguageModelV3Prompt, | ||
| SharedV3Warning, | ||
| } from '@ai-sdk/provider'; | ||
| import { | ||
| combineHeaders, | ||
| convertToBase64, | ||
| createJsonResponseHandler, | ||
| FetchFunction, | ||
| generateId as defaultGenerateId, | ||
| type InferSchema, | ||
@@ -9,2 +16,3 @@ lazySchema, | ||
| postJsonToApi, | ||
| Resolvable, | ||
| resolve, | ||
@@ -19,3 +27,3 @@ zodSchema, | ||
| } from './google-generative-ai-image-settings'; | ||
| import { FetchFunction, Resolvable } from '@ai-sdk/provider-utils'; | ||
| import { GoogleGenerativeAILanguageModel } from './google-generative-ai-language-model'; | ||
@@ -37,4 +45,11 @@ interface GoogleGenerativeAIImageModelConfig { | ||
| get maxImagesPerCall(): number { | ||
| if (this.settings.maxImagesPerCall != null) { | ||
| return this.settings.maxImagesPerCall; | ||
| } | ||
| // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-image | ||
| if (isGeminiModel(this.modelId)) { | ||
| return 10; | ||
| } | ||
| // https://ai.google.dev/gemini-api/docs/imagen#imagen-model | ||
| return this.settings.maxImagesPerCall ?? 4; | ||
| return 4; | ||
| } | ||
@@ -55,2 +70,12 @@ | ||
| ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> { | ||
| // Gemini image models use the language model API internally | ||
| if (isGeminiModel(this.modelId)) { | ||
| return this.doGenerateGemini(options); | ||
| } | ||
| return this.doGenerateImagen(options); | ||
| } | ||
| private async doGenerateImagen( | ||
| options: Parameters<ImageModelV3['doGenerate']>[0], | ||
| ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> { | ||
| const { | ||
@@ -70,6 +95,6 @@ prompt, | ||
| // Google Generative AI does not support image editing | ||
| // Imagen API endpoints do not support image editing | ||
| if (files != null && files.length > 0) { | ||
| throw new Error( | ||
| 'Google Generative AI does not support image editing. ' + | ||
| 'Google Generative AI does not support image editing with Imagen models. ' + | ||
| 'Use Google Vertex AI (@ai-sdk/google-vertex) for image editing capabilities.', | ||
@@ -146,6 +171,6 @@ ); | ||
| ), | ||
| warnings: warnings ?? [], | ||
| warnings, | ||
| providerMetadata: { | ||
| google: { | ||
| images: response.predictions.map(prediction => ({ | ||
| images: response.predictions.map(() => ({ | ||
| // Add any prediction-specific metadata here | ||
@@ -162,4 +187,144 @@ })), | ||
| } | ||
| private async doGenerateGemini( | ||
| options: Parameters<ImageModelV3['doGenerate']>[0], | ||
| ): Promise<Awaited<ReturnType<ImageModelV3['doGenerate']>>> { | ||
| const { | ||
| prompt, | ||
| n, | ||
| size, | ||
| aspectRatio, | ||
| seed, | ||
| providerOptions, | ||
| headers, | ||
| abortSignal, | ||
| files, | ||
| mask, | ||
| } = options; | ||
| const warnings: Array<SharedV3Warning> = []; | ||
| // Gemini does not support mask-based inpainting | ||
| if (mask != null) { | ||
| throw new Error( | ||
| 'Gemini image models do not support mask-based image editing.', | ||
| ); | ||
| } | ||
| // Gemini does not support generating multiple images per call via n parameter | ||
| if (n != null && n > 1) { | ||
| throw new Error( | ||
| 'Gemini image models do not support generating a set number of images per call. Use n=1 or omit the n parameter.', | ||
| ); | ||
| } | ||
| if (size != null) { | ||
| warnings.push({ | ||
| type: 'unsupported', | ||
| feature: 'size', | ||
| details: | ||
| 'This model does not support the `size` option. Use `aspectRatio` instead.', | ||
| }); | ||
| } | ||
| // Build user message content for language model | ||
| const userContent: Array< | ||
| | { type: 'text'; text: string } | ||
| | { type: 'file'; data: string | Uint8Array | URL; mediaType: string } | ||
| > = []; | ||
| // Add text prompt | ||
| if (prompt != null) { | ||
| userContent.push({ type: 'text', text: prompt }); | ||
| } | ||
| // Add input images for editing | ||
| if (files != null && files.length > 0) { | ||
| for (const file of files) { | ||
| if (file.type === 'url') { | ||
| userContent.push({ | ||
| type: 'file', | ||
| data: new URL(file.url), | ||
| mediaType: 'image/*', | ||
| }); | ||
| } else { | ||
| userContent.push({ | ||
| type: 'file', | ||
| data: | ||
| typeof file.data === 'string' | ||
| ? file.data | ||
| : new Uint8Array(file.data), | ||
| mediaType: file.mediaType, | ||
| }); | ||
| } | ||
| } | ||
| } | ||
| const languageModelPrompt: LanguageModelV3Prompt = [ | ||
| { role: 'user', content: userContent }, | ||
| ]; | ||
| // Instantiate language model | ||
| const languageModel = new GoogleGenerativeAILanguageModel(this.modelId, { | ||
| provider: this.config.provider, | ||
| baseURL: this.config.baseURL, | ||
| headers: this.config.headers ?? {}, | ||
| fetch: this.config.fetch, | ||
| generateId: this.config.generateId ?? defaultGenerateId, | ||
| }); | ||
| // Call language model with image-only response modality | ||
| const result = await languageModel.doGenerate({ | ||
| prompt: languageModelPrompt, | ||
| seed, | ||
| providerOptions: { | ||
| google: { | ||
| responseModalities: ['IMAGE'], | ||
| imageConfig: aspectRatio ? { aspectRatio } : undefined, | ||
| ...((providerOptions?.google as Record<string, unknown>) ?? {}), | ||
| }, | ||
| }, | ||
| headers, | ||
| abortSignal, | ||
| }); | ||
| const currentDate = this.config._internal?.currentDate?.() ?? new Date(); | ||
| // Extract images from language model response | ||
| const images: string[] = []; | ||
| for (const part of result.content) { | ||
| if (part.type === 'file' && part.mediaType.startsWith('image/')) { | ||
| images.push(convertToBase64(part.data)); | ||
| } | ||
| } | ||
| return { | ||
| images, | ||
| warnings, | ||
| providerMetadata: { | ||
| google: { | ||
| images: images.map(() => ({})), | ||
| }, | ||
| }, | ||
| response: { | ||
| timestamp: currentDate, | ||
| modelId: this.modelId, | ||
| headers: result.response?.headers, | ||
| }, | ||
| usage: result.usage | ||
| ? { | ||
| inputTokens: result.usage.inputTokens.total, | ||
| outputTokens: result.usage.outputTokens.total, | ||
| totalTokens: | ||
| (result.usage.inputTokens.total ?? 0) + | ||
| (result.usage.outputTokens.total ?? 0), | ||
| } | ||
| : undefined, | ||
| }; | ||
| } | ||
| } | ||
| function isGeminiModel(modelId: string): boolean { | ||
| return modelId.startsWith('gemini-'); | ||
| } | ||
| // minimal version of the schema | ||
@@ -166,0 +331,0 @@ const googleImageResponseSchema = lazySchema(() => |
| export type GoogleGenerativeAIImageModelId = | ||
| // Imagen models (use :predict API) | ||
| | 'imagen-4.0-generate-001' | ||
| | 'imagen-4.0-ultra-generate-001' | ||
| | 'imagen-4.0-fast-generate-001' | ||
| // Gemini image models (technically multimodal output language models, use :generateContent API) | ||
| | 'gemini-2.5-flash-image' | ||
| | 'gemini-3-pro-image-preview' | ||
| | (string & {}); | ||
@@ -6,0 +10,0 @@ |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
1056146
2.9%11741
3.44%