
Research
/Security News
Shai Hulud Strikes Again (v2)
Another wave of Shai-Hulud campaign has hit npm with more than 500 packages and 700+ versions affected.
github.com/enesunal-m/azrealtime
Advanced tools
A production-ready Go client library for Azure OpenAI's GPT-4o Realtime API, enabling real-time bidirectional communication with voice and text.
âś… Core Functionality
âś… Audio Processing
âś… Production Ready
âś… Developer Experience
Note: Azure GPT-4o Realtime is in public preview. Use API version
2025-04-01-previewand monitor the official documentation for updates.
go get github.com/enesunal-m/azrealtime
package main
import (
"context"
"log"
"os"
"time"
"github.com/enesunal-m/azrealtime"
)
func main() {
// Configure client
cfg := azrealtime.Config{
ResourceEndpoint: os.Getenv("AZURE_OPENAI_ENDPOINT"),
Deployment: os.Getenv("AZURE_OPENAI_REALTIME_DEPLOYMENT"),
APIVersion: "2025-04-01-preview",
Credential: azrealtime.APIKey(os.Getenv("AZURE_OPENAI_API_KEY")),
DialTimeout: 30 * time.Second,
}
// Create client
ctx := context.Background()
client, err := azrealtime.Dial(ctx, cfg)
if err != nil {
log.Fatalf("Failed to connect: %v", err)
}
defer client.Close()
// Set up event handlers
client.OnResponseTextDelta(func(event azrealtime.ResponseTextDelta) {
print(event.Delta) // Stream text response
})
// Configure session
session := azrealtime.Session{
Voice: azrealtime.Ptr("alloy"),
Instructions: azrealtime.Ptr("You are a helpful assistant."),
InputAudioFormat: azrealtime.Ptr("pcm16"),
OutputAudioFormat: azrealtime.Ptr("pcm16"),
}
client.SessionUpdate(ctx, session)
// Create response
opts := azrealtime.CreateResponseOptions{
Modalities: []string{"text", "audio"},
Prompt: "Hello! Please introduce yourself.",
}
eventID, err := client.CreateResponse(ctx, opts)
if err != nil {
log.Fatalf("Failed to create response: %v", err)
}
log.Printf("Response requested: %s", eventID)
time.Sleep(5 * time.Second) // Wait for response
}
The library provides a WebSocket-based client for Azure OpenAI Realtime API:
# Required
export AZURE_OPENAI_ENDPOINT="https://your-resource.openai.azure.com"
export AZURE_OPENAI_REALTIME_DEPLOYMENT="your-deployment-name"
export AZURE_OPENAI_API_KEY="your-api-key"
# Optional
export AZURE_OPENAI_API_VERSION="2025-04-01-preview"
cfg := azrealtime.Config{
ResourceEndpoint: "https://your-resource.openai.azure.com",
Deployment: "gpt-4o-realtime-preview",
APIVersion: "2025-04-01-preview",
Credential: azrealtime.APIKey("your-api-key"), // or Bearer token
DialTimeout: 30 * time.Second,
HandshakeHeaders: http.Header{"Custom-Header": []string{"value"}},
Logger: func(event string, fields map[string]any) {
log.Printf("[%s] %+v", event, fields)
},
}
// API Key (most common)
cfg.Credential = azrealtime.APIKey("your-api-key")
// Bearer token (for Azure AD authentication)
cfg.Credential = azrealtime.Bearer("your-bearer-token")
The library provides advanced structured logging with configurable levels:
// Option 1: Environment-based logging (set AZREALTIME_LOG_LEVEL=DEBUG)
cfg := azrealtime.Config{
// ... other config
StructuredLogger: azrealtime.NewLoggerFromEnv(),
}
// Option 2: Explicit log level
cfg := azrealtime.Config{
// ... other config
StructuredLogger: azrealtime.NewLogger(azrealtime.LogLevelDebug),
}
// Option 3: Contextual logging
logger := azrealtime.NewLogger(azrealtime.LogLevelInfo)
sessionLogger := logger.WithContext(map[string]interface{}{
"session_id": "abc123",
"user_id": "user456",
})
sessionLogger.Info("user_connected", map[string]interface{}{
"ip": "192.168.1.1",
})
// Output: [azrealtime] [INFO] user_connected session_id=abc123 user_id=user456 ip=192.168.1.1
Log Levels:
LogLevelDebug: All messages including detailed debuggingLogLevelInfo: Informational messages and above (default)LogLevelWarn: Warnings and errors onlyLogLevelError: Error messages onlyLogLevelOff: No loggingEnvironment Variables:
AZREALTIME_LOG_LEVEL: Sets the minimum log level (DEBUG, INFO, WARN, ERROR, OFF)The library provides structured error types for better error handling:
client, err := azrealtime.Dial(ctx, cfg)
if err != nil {
var configErr *azrealtime.ConfigError
var connErr *azrealtime.ConnectionError
switch {
case errors.As(err, &configErr):
log.Printf("Configuration error in %s: %s", configErr.Field, configErr.Message)
case errors.As(err, &connErr):
log.Printf("Connection failed: %v", connErr.Cause)
default:
log.Printf("Unexpected error: %v", err)
}
}
// Set up audio assembler
audioAssembler := azrealtime.NewAudioAssembler()
client.OnResponseAudioDelta(func(event azrealtime.ResponseAudioDelta) {
audioAssembler.OnDelta(event)
})
client.OnResponseAudioDone(func(event azrealtime.ResponseAudioDone) {
pcmData := audioAssembler.OnDone(event.ResponseID)
// Convert to WAV for saving/playback
wavData := azrealtime.WAVFromPCM16Mono(pcmData, azrealtime.DefaultSampleRate)
os.WriteFile("response.wav", wavData, 0644)
})
// Send audio input
audioChunk := make([]byte, azrealtime.PCM16BytesFor(200, azrealtime.DefaultSampleRate))
client.AppendPCM16(ctx, audioChunk)
client.InputCommit(ctx) // Signal end of input
session := azrealtime.Session{
Voice: azrealtime.Ptr("alloy"), // Voice selection
Instructions: azrealtime.Ptr("Custom system prompt..."),
InputAudioFormat: azrealtime.Ptr("pcm16"),
OutputAudioFormat: azrealtime.Ptr("pcm16"),
InputTranscription: &azrealtime.InputTranscription{
Model: "whisper-1",
Language: "en",
},
TurnDetection: &azrealtime.TurnDetection{
Type: "server_vad",
Threshold: 0.5, // Sensitivity (0.0-1.0)
PrefixPaddingMS: 300, // Audio before speech
SilenceDurationMS: 1000, // Silence to end turn
CreateResponse: true, // Auto-respond
},
}
err := client.SessionUpdate(ctx, session)
See the examples/ directory for comprehensive examples:
ws-minimal/: Basic WebSocket usagecomprehensive/: Production-ready patternswebrtc-browser/: Browser WebRTC integrationEach example includes detailed documentation and error handling patterns.
Run the full test suite:
# Run all tests
go test -v ./...
# Run with coverage
go test -cover ./...
# Run specific test patterns
go test -v ./azrealtime -run TestDial
The library includes:
// Correct approach: wait for server events before creating response
client.OnInputAudioBufferCommitted(func(event azrealtime.InputAudioBufferCommitted) {
log.Printf("Audio committed: %s", event.ItemID)
// Now create response
client.CreateResponse(ctx, azrealtime.CreateResponseOptions{
Modalities: []string{"text", "audio"},
})
})
// Send audio and let server VAD handle it
client.AppendPCM16(ctx, audioData)
// Don't manually commit - let server VAD decide
// Use server VAD, don't manually commit every chunk
session := azrealtime.Session{
TurnDetection: &azrealtime.TurnDetection{
Type: "server_vad",
CreateResponse: true, // Let server create responses automatically
},
}
Config: Client configuration optionsClient: Main WebSocket clientSession: AI assistant configurationCreateResponseOptions: Response generation settingsSession Events:
SessionCreated / SessionUpdated: Session lifecycle managementErrorEvent: API errors and warningsRateLimitsUpdated: Rate limiting informationAudio Input Events:
InputAudioBufferSpeechStarted / InputAudioBufferSpeechStopped: Voice activity detectionInputAudioBufferCommitted / InputAudioBufferCleared: Audio buffer managementConversation Events:
ConversationItemCreated / ConversationItemDeleted / ConversationItemTruncated: Item managementConversationItemInputAudioTranscriptionCompleted / ConversationItemInputAudioTranscriptionFailed: Transcription eventsResponse Events:
ResponseCreated / ResponseDone: Response lifecycleResponseTextDelta / ResponseTextDone: Streaming text responsesResponseAudioDelta / ResponseAudioDone: Streaming audio responsesResponseAudioTranscriptDelta / ResponseAudioTranscriptDone: Audio transcription streamingResponseOutputItemAdded / ResponseOutputItemDone: Response item managementResponseContentPartAdded / ResponseContentPartDone: Content part managementResponseFunctionCallArgumentsDelta / ResponseFunctionCallArgumentsDone: Function call streamingConfigError: Configuration validation errorsConnectionError: Network and connection errorsSendError: Message transmission errorsEventError: Event processing errorsPtr[T](v T) *T: Create pointer from valuePCM16BytesFor(ms, rate int) int: Calculate audio buffer sizeWAVFromPCM16Mono([]byte, int) []byte: Convert PCM to WAVTo publish this library as a Go module:
Create a public repository:
git init
git remote add origin git@github.com:enesunal-m/azrealtime.git
Update the module path:
# Update go.mod with your repository path
sed -i 's|github.com/enesunal-m/azrealtime|github.com/yourusername/azrealtime|g' go.mod
go mod tidy
Tag and publish:
git add .
git commit -m "Initial release"
git tag v1.0.0
git push origin main --tags
Use in other projects:
go get github.com/yourusername/azrealtime@v1.0.0
git checkout -b feature/amazing-feature)go test ./...)git commit -am 'Add amazing feature')git push origin feature/amazing-feature)This project is licensed under the MIT License - see the LICENSE file for details.
FAQs
Unknown package
Did you know?

Socket for GitHub automatically highlights issues in each pull request and monitors the health of all your open source dependencies. Discover the contents of your packages and block harmful activity before you install or update your dependencies.

Research
/Security News
Another wave of Shai-Hulud campaign has hit npm with more than 500 packages and 700+ versions affected.

Product
Add real-time Socket webhook events to your workflows to automatically receive software supply chain alert changes in real time.

Security News
ENISA has become a CVE Program Root, giving the EU a central authority for coordinating vulnerability reporting, disclosure, and cross-border response.