Consuming Voice Agents in Client-Side Web Applications (WebRTC)
Integration Architecture
To establish an ultra-low latency, real-time voice call between a user's browser and your Langoedge voice agent, the connection is established over WebRTC using LiveKit.
Because connecting to a real-time room requires a secure participant token, the integration involves both your backend and frontend application:
Step 1: Create a Secure Backend Session Proxy
To request a LiveKit connection token, you must make a POST request to Langoedge's session API. Since this requires sensitive authorization credentials (like cookies or API keys), you should never request this directly from the browser. Instead, set up a backend route in your application to act as a proxy.
Here is an example Next.js API Route Handler (app/api/voice-agent/session/route.ts):
import { NextResponse } from "next/server";
export async function POST(request: Request) {
try {
const response = await fetch("https://app.langoedge.com/api/voice-agent/session", {
method: "POST",
headers: {
"accept": "application/json",
"content-type": "application/json",
// Pass your Langoedge application credentials here
"cookie": `appSession=${process.env.LANGOEDGE_SESSION_TOKEN}`,
},
body: JSON.stringify({
room_config: {
agent: {
agent_name: process.env.LANGOEDGE_VOICE_AGENT_ID // E.g. "6a1bfe2dc9e17ba91f9ed664"
},
participant: {
id: "user_12345",
first_name: "John",
last_name: "Doe"
}
}
})
});
if (!response.ok) {
const errorText = await response.text();
console.error("Langoedge voice session error:", errorText);
return NextResponse.json(
{ error: "Failed to create session with voice agent backend" },
{ status: response.status }
);
}
const data = await response.json();
// Returns { serverUrl: string, participantToken: string }
return NextResponse.json(data);
} catch (error) {
console.error("Proxy error:", error);
return NextResponse.json(
{ error: "Internal server error during proxying" },
{ status: 500 }
);
}
}
[!NOTE]
Why cookie auth here instead ofx-langoedge-secret? The/api/voice-agent/sessionendpoint is a first-party Langoedge application route that accepts a session cookie (appSession), not the external API secret. This is intentional: the session endpoint issues a scoped LiveKit participant token rather than a full API operation, so it lives behind the application authentication layer. Thex-langoedge-secretheader is used for the external API gateway (api.langoedge.com) and is not interchangeable with the session cookie. Keep both credentials separate and never expose either in client-side code — always proxy through your backend.
Step 2: Install LiveKit Client-Side SDKs
In your client application, install the LiveKit React components and WebRTC client SDK:
npm install @livekit/components-react @livekit/components-styles livekit-client lucide-react
Make sure to import the CSS styles once in your layout or main file (e.g., layout.tsx or app.tsx):
import "@livekit/components-styles";
Step 3: Build the Voice Call Controller Component
The controller component manages the active session state, triggers the backend proxy to retrieve the room token, and mounts the LiveKit room when a call starts.
"use client";
import React, { useState } from "react";
import { LiveKitRoom, RoomAudioRenderer } from "@livekit/components-react";
import { PhoneCall } from "lucide-react";
import { VoiceAgentVisualizer } from "./VoiceAgentVisualizer";
export function VoiceCallController() {
const [isCallActive, setIsCallActive] = useState(false);
const [isLoading, setIsLoading] = useState(false);
const [connectionDetails, setConnectionDetails] = useState<{
serverUrl: string;
participantToken: string;
} | null>(null);
const startVoiceCall = async () => {
try {
setIsLoading(true);
const response = await fetch("/api/voice-agent/session", {
method: "POST",
headers: { "Content-Type": "application/json" }
});
if (!response.ok) {
throw new Error("Failed to retrieve voice session token");
}
const data = await response.json();
setConnectionDetails({
serverUrl: data.serverUrl,
participantToken: data.participantToken
});
setIsCallActive(true);
} catch (err) {
console.error("Error connecting:", err);
alert("Failed to start the call. Please check your credentials.");
} finally {
setIsLoading(false);
}
};
const endVoiceCall = () => {
setIsCallActive(false);
setConnectionDetails(null);
};
return (
<div className="flex flex-col items-center justify-center p-6 border border-zinc-800 bg-zinc-950 rounded-2xl w-full max-w-md">
<h3 className="text-lg font-bold text-zinc-100 mb-6">Langoedge Voice Portal</h3>
{isCallActive && connectionDetails ? (
<LiveKitRoom
video={false}
audio={true}
token={connectionDetails.participantToken}
serverUrl={connectionDetails.serverUrl}
onDisconnected={endVoiceCall}
className="w-full flex flex-col items-center"
>
{/* RoomAudioRenderer mounts the browser audio element to output the remote agent's speech */}
<RoomAudioRenderer />
{/* VoiceAgentVisualizer listens to audio levels and displays agent status */}
<VoiceAgentVisualizer />
<button
onClick={endVoiceCall}
className="mt-6 px-6 py-2.5 bg-rose-600 hover:bg-rose-500 text-white rounded-full flex items-center gap-2 font-medium"
>
<PhoneCall className="w-4 h-4 rotate-[135deg]" />
End Call
</button>
</LiveKitRoom>
) : (
<button
onClick={startVoiceCall}
disabled={isLoading}
className="relative w-24 h-24 bg-gradient-to-tr from-emerald-500 to-teal-500 rounded-full flex items-center justify-center text-white hover:scale-105 active:scale-95 transition-transform shadow-lg shadow-emerald-500/20"
>
{isLoading ? (
<div className="w-6 h-6 border-2 border-white/30 border-t-white rounded-full animate-spin" />
) : (
<PhoneCall className="w-8 h-8 fill-current" />
)}
</button>
)}
</div>
);
}
Step 4: Create the Live Real-Time Visualizer
Using the @livekit/components-react context, you can track the current conversational state of the voice agent (e.g., whether the agent is listening, thinking, or speaking) and extract real-time audio volume levels to drive a custom visual equalizer.
"use client";
import React, { useMemo } from "react";
import { useVoiceAssistant, useMultibandTrackVolume } from "@livekit/components-react";
export function VoiceAgentVisualizer() {
const { state, audioTrack } = useVoiceAssistant();
const barCount = 16;
// Retrieve multi-band volume thresholds for dynamic equalizer bars
const volumes = useMultibandTrackVolume(audioTrack, {
bands: barCount,
loPass: 20,
hiPass: 900,
updateInterval: 30,
});
const statusLabel = useMemo(() => {
switch (state) {
case "connecting":
return "Connecting to voice agent...";
case "initializing":
return "Initializing voice streams...";
case "idle":
return "Agent is waiting...";
case "listening":
return "Agent is listening to you...";
case "thinking":
return "Agent is formulating a response...";
case "speaking":
return "Agent is speaking...";
case "failed":
return "Connection failed.";
default:
return "Active session";
}
}, [state]);
return (
<div className="w-full flex flex-col items-center gap-4 bg-zinc-900/60 p-6 rounded-xl border border-zinc-800">
<p className="text-xs font-semibold text-emerald-400 uppercase tracking-widest">{statusLabel}</p>
{/* Equalizer animation */}
<div className="flex items-end gap-1.5 h-16 w-full justify-center">
{Array.from({ length: barCount }).map((_, i) => {
const volume = volumes[i] ?? 0;
// Apply some baseline height if speaking, or scale with volume
const activeHeight = state === "speaking" ? 10 + volume * 50 : 4;
return (
<div
key={i}
className="w-1.5 rounded-full bg-emerald-500 transition-all duration-75"
style={{
height: `${activeHeight}px`,
opacity: state === "speaking" ? 0.3 + volume * 0.7 : 0.2,
}}
/>
);
})}
</div>
</div>
);
}
Step 5: User Interaction & Browser Policies
To prevent security warnings and ensure a smooth user experience, keep the following browser limitations in mind:
- User Interaction Gate: Modern browsers block microphone access and audio playback unless initiated by a direct user gesture (like clicking a "Start Call" button). Do not attempt to initialize voice rooms automatically on page load.
- Production Permissions: Direct WebRTC links require secure origins (
https://orlocalhost). If deploy-testing on external staging environments, make sure SSL is configured.