rootflo · rootflo-hardik · Jan 23, 2026 · Jan 27, 2026 · Jan 27, 2026 · Jan 27, 2026
diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/CreateVoiceAgentDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/CreateVoiceAgentDialog.tsx
@@ -56,7 +56,9 @@ const createVoiceAgentSchema = z.object({
   tts_config_id: z.string().min(1, 'TTS configuration is required'),
   stt_config_id: z.string().min(1, 'STT configuration is required'),
   telephony_config_id: z.string().min(1, 'Telephony configuration is required'),
-  tts_voice_id: z.string().min(1, 'TTS Voice ID is required'),
+  tts_voice_ids: z.record(z.string(), z.string()).refine((val) => Object.keys(val).length > 0, {
+    message: 'At least one voice ID is required',
+  }),
-  tts_voice_ids: z.record(z.string(), z.string()).refine((val) => Object.keys(val).length > 0, {
-    message: 'At least one voice ID is required',
-  }),
+  tts_voice_ids: z.record(z.string(), z.string().min(1, 'Voice ID cannot be empty')).refine(
+    (val) => Object.keys(val).length > 0 && Object.values(val).every((v) => v.trim().length > 0),
+    {
+      message: 'At least one non-empty voice ID is required',
+    }
+  ),
-  tts_voice_ids: z.record(z.string(), z.string()).refine((val) => Object.keys(val).length > 0, {
-    message: 'At least one voice ID is required',
-  }),
+  tts_voice_ids: z.record(z.string(), z.string().min(1, 'Voice ID cannot be empty')).refine(
+    (val) => Object.keys(val).length > 0 && Object.values(val).every((v) => v.trim().length > 0),
+    {
+      message: 'At least one non-empty voice ID is required',
+    }
+  ),
   system_prompt: z.string().min(1, 'System prompt is required'),
   welcome_message: z.string().min(1, 'Welcome message is required'),
   conversation_config: z.string().optional(),
@@ -83,6 +85,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
   const [creating, setCreating] = useState(false);
   const [ttsParameters, setTtsParameters] = useState<Record<string, unknown>>({});
   const [sttParameters, setSttParameters] = useState<Record<string, unknown>>({});
+  const [voiceIdState, setVoiceIdState] = useState<Record<string, string>>({ en: '' });
 
   // Fetch configs for dropdowns
   const { data: llmConfigs = [] } = useGetLLMConfigs(appId);
@@ -99,7 +102,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
       tts_config_id: '',
       stt_config_id: '',
       telephony_config_id: '',
-      tts_voice_id: '',
+      tts_voice_ids: { en: '' },
       system_prompt: '',
       welcome_message: '',
       conversation_config: '{}',
@@ -114,6 +117,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
   // Watch config selections to determine providers
   const watchedTtsConfigId = form.watch('tts_config_id');
   const watchedSttConfigId = form.watch('stt_config_id');
+  const watchedSupportedLanguages = form.watch('supported_languages');
 
   // Get selected providers
   const selectedTtsProvider = ttsConfigs.find((c) => c.id === watchedTtsConfigId)?.provider;
@@ -132,6 +136,20 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
     }
   }, [selectedSttProvider, isOpen]);
 
+  // Sync voice ID state with language changes
+  useEffect(() => {
+    if (isOpen && watchedSupportedLanguages) {
+      setVoiceIdState((prev) => {
+        const newState: Record<string, string> = {};
+        // Preserve existing voice IDs for languages still selected
+        watchedSupportedLanguages.forEach((lang) => {
+          newState[lang] = prev[lang] || '';
+        });
+        return newState;
+      });
+    }
+  }, [watchedSupportedLanguages, isOpen]);
+
   // Reset form when dialog closes
   useEffect(() => {
     if (!isOpen) {
@@ -142,7 +160,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
         tts_config_id: '',
         stt_config_id: '',
         telephony_config_id: '',
-        tts_voice_id: '',
+        tts_voice_ids: { en: '' },
         system_prompt: '',
         welcome_message: '',
         conversation_config: '{}',
@@ -154,6 +172,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
       });
       setTtsParameters({});
       setSttParameters({});
+      setVoiceIdState({ en: '' });
     }
   }, [isOpen, form]);
 
@@ -232,7 +251,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
         tts_config_id: data.tts_config_id.trim(),
         stt_config_id: data.stt_config_id.trim(),
         telephony_config_id: data.telephony_config_id.trim(),
-        tts_voice_id: data.tts_voice_id.trim(),
+        tts_voice_ids: data.tts_voice_ids,
         tts_parameters: Object.keys(builtTtsParameters).length > 0 ? builtTtsParameters : null,
         stt_parameters: Object.keys(builtSttParameters).length > 0 ? builtSttParameters : null,
         system_prompt: data.system_prompt.trim(),
@@ -710,17 +729,31 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
                   <h4 className="text-sm font-medium">TTS Voice Settings</h4>
                   <FormField
                     control={form.control}
-                    name="tts_voice_id"
+                    name="tts_voice_ids"
                     render={({ field }) => (
                       <FormItem>
                         <FormLabel>
-                          TTS Voice ID<span className="text-red-500">*</span>
+                          TTS Voice IDs<span className="text-red-500">*</span>
                         </FormLabel>
-                        <FormControl>
-                          <Input placeholder="e.g., alloy, echo, fable (OpenAI) or voice ID (ElevenLabs)" {...field} />
-                        </FormControl>
+                        <div className="space-y-3">
+                          {watchedSupportedLanguages.map((langCode) => (
+                            <div key={langCode} className="flex items-center gap-3">
+                              <Label className="w-24 text-sm font-medium">{getLanguageDisplayName(langCode)}:</Label>
+                              <Input
+                                placeholder={`Voice ID for ${getLanguageDisplayName(langCode)}`}
+                                value={voiceIdState[langCode] || ''}
+                                onChange={(e) => {
+                                  const newState = { ...voiceIdState, [langCode]: e.target.value };
+                                  setVoiceIdState(newState);
+                                  field.onChange(newState);
+                                }}
+                                className="flex-1"
+                              />
+                            </div>
+                          ))}
+                        </div>
                         <FormDescription>
-                          Provider-specific voice identifier (e.g., for Deepgram: aura-2-helena-en)
+                          Provider-specific voice identifiers per language (e.g., "aura-2-helena-en" for Deepgram)
                         </FormDescription>
                         <FormMessage />
                       </FormItem>

diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/EditVoiceAgentDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/EditVoiceAgentDialog.tsx
@@ -51,7 +51,7 @@ const updateVoiceAgentSchema = z.object({
   tts_config_id: z.string().min(1, 'TTS configuration is required'),
   stt_config_id: z.string().min(1, 'STT configuration is required'),
   telephony_config_id: z.string().min(1, 'Telephony configuration is required'),
-  tts_voice_id: z.string().min(1, 'TTS Voice ID is required'),
+  tts_voice_ids: z.record(z.string(), z.string()).optional(),
   system_prompt: z.string().min(1, 'System prompt is required'),
   welcome_message: z.string().min(1, 'Welcome message is required'),
   conversation_config: z.string().optional(),
@@ -92,6 +92,7 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
   // State for TTS/STT parameters (managed separately from form)
   const [ttsParameters, setTtsParameters] = useState<Record<string, unknown>>({});
   const [sttParameters, setSttParameters] = useState<Record<string, unknown>>({});
+  const [voiceIdState, setVoiceIdState] = useState<Record<string, string>>(agent.tts_voice_ids || { en: '' });
 
   const form = useForm<UpdateVoiceAgentInput>({
     resolver: zodResolver(updateVoiceAgentSchema),
@@ -102,7 +103,7 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
       tts_config_id: agent.tts_config_id,
       stt_config_id: agent.stt_config_id,
       telephony_config_id: agent.telephony_config_id,
-      tts_voice_id: agent.tts_voice_id,
+      tts_voice_ids: agent.tts_voice_ids,
       system_prompt: agent.system_prompt,
       welcome_message: agent.welcome_message,
       conversation_config: agent.conversation_config ? JSON.stringify(agent.conversation_config, null, 2) : '{}',
@@ -117,6 +118,7 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
   // Watch for config changes to determine providers
   const watchedTtsConfigId = form.watch('tts_config_id');
   const watchedSttConfigId = form.watch('stt_config_id');
+  const watchedSupportedLanguages = form.watch('supported_languages');
 
   const selectedTtsProvider = ttsConfigs.find((c) => c.id === watchedTtsConfigId)?.provider;
   const selectedSttProvider = sttConfigs.find((c) => c.id === watchedSttConfigId)?.provider;
@@ -134,7 +136,7 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
         tts_config_id: agent.tts_config_id,
         stt_config_id: agent.stt_config_id,
         telephony_config_id: agent.telephony_config_id,
-        tts_voice_id: agent.tts_voice_id,
+        tts_voice_ids: agent.tts_voice_ids,
         system_prompt: agent.system_prompt,
         welcome_message: agent.welcome_message,
         conversation_config: agent.conversation_config ? JSON.stringify(agent.conversation_config, null, 2) : '{}',
@@ -144,6 +146,7 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
         supported_languages: agent.supported_languages || ['en'],
         default_language: agent.default_language || 'en',
       });
+      setVoiceIdState(agent.tts_voice_ids || { en: '' });
     }
   }, [isOpen, agent, form]);
 
@@ -161,6 +164,20 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
     }
   }, [isOpen, agent.stt_parameters]);
 
+  // Sync voice ID state with language changes
+  useEffect(() => {
+    if (isOpen && watchedSupportedLanguages) {
+      setVoiceIdState((prev) => {
+        const newState: Record<string, string> = {};
+        // Preserve existing voice IDs for languages still selected
+        watchedSupportedLanguages.forEach((lang) => {
+          newState[lang] = prev[lang] || '';
+        });
+        return newState;
+      });
+    }
+  }, [watchedSupportedLanguages, isOpen]);
-  // Sync voice ID state with language changes
-  useEffect(() => {
-    if (isOpen && watchedSupportedLanguages) {
-      setVoiceIdState((prev) => {
-        const newState: Record<string, string> = {};
-        // Preserve existing voice IDs for languages still selected
-        watchedSupportedLanguages.forEach((lang) => {
-          newState[lang] = prev[lang] || '';
-        });
-        return newState;
-      });
-    }
-  }, [watchedSupportedLanguages, isOpen]);
+  // Sync voice ID state with language changes
+  useEffect(() => {
+    if (isOpen && watchedSupportedLanguages) {
+      setVoiceIdState((prev) => {
+        const newState: Record<string, string> = {};
+        // Preserve existing voice IDs for languages still selected
+        watchedSupportedLanguages.forEach((lang) => {
+          newState[lang] = prev[lang] || '';
+        });
+        form.setValue('tts_voice_ids', newState, { shouldDirty: true });
+        return newState;
+      });
+    }
+  }, [watchedSupportedLanguages, isOpen, form]);
-  // Sync voice ID state with language changes
-  useEffect(() => {
-    if (isOpen && watchedSupportedLanguages) {
-      setVoiceIdState((prev) => {
-        const newState: Record<string, string> = {};
-        // Preserve existing voice IDs for languages still selected
-        watchedSupportedLanguages.forEach((lang) => {
-          newState[lang] = prev[lang] || '';
-        });
-        return newState;
-      });
-    }
-  }, [watchedSupportedLanguages, isOpen]);
+  // Sync voice ID state with language changes
+  useEffect(() => {
+    if (isOpen && watchedSupportedLanguages) {
+      setVoiceIdState((prev) => {
+        const newState: Record<string, string> = {};
+        // Preserve existing voice IDs for languages still selected
+        watchedSupportedLanguages.forEach((lang) => {
+          newState[lang] = prev[lang] || '';
+        });
+        form.setValue('tts_voice_ids', newState, { shouldDirty: true });
+        return newState;
+      });
+    }
+  }, [watchedSupportedLanguages, isOpen, form]);
+
   // Helper functions to update parameters
   const setTtsParameter = (key: string, value: unknown) => {
     setTtsParameters((prev) => ({ ...prev, [key]: value }));
@@ -266,8 +283,8 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
         requestData.telephony_config_id = data.telephony_config_id;
       }
 
-      if (data.tts_voice_id.trim() !== agent.tts_voice_id) {
-        requestData.tts_voice_id = data.tts_voice_id.trim();
+      if (JSON.stringify(data.tts_voice_ids) !== JSON.stringify(agent.tts_voice_ids)) {
+        requestData.tts_voice_ids = data.tts_voice_ids;
       }
 
       // Check if TTS parameters changed
@@ -757,17 +774,31 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
                   <h4 className="text-sm font-medium">TTS Voice Settings</h4>
                   <FormField
                     control={form.control}
-                    name="tts_voice_id"
+                    name="tts_voice_ids"
                     render={({ field }) => (
                       <FormItem>
                         <FormLabel>
-                          TTS Voice ID<span className="text-red-500">*</span>
+                          TTS Voice IDs<span className="text-red-500">*</span>
                         </FormLabel>
-                        <FormControl>
-                          <Input placeholder="e.g., alloy, echo, fable (OpenAI) or voice ID (ElevenLabs)" {...field} />
-                        </FormControl>
+                        <div className="space-y-3">
+                          {watchedSupportedLanguages.map((langCode) => (
+                            <div key={langCode} className="flex items-center gap-3">
+                              <Label className="w-24 text-sm font-medium">{getLanguageDisplayName(langCode)}:</Label>
+                              <Input
+                                placeholder={`Voice ID for ${getLanguageDisplayName(langCode)}`}
+                                value={voiceIdState[langCode] || ''}
+                                onChange={(e) => {
+                                  const newState = { ...voiceIdState, [langCode]: e.target.value };
+                                  setVoiceIdState(newState);
+                                  field.onChange(newState);
+                                }}
+                                className="flex-1"
+                              />
+                            </div>
+                          ))}
+                        </div>
                         <FormDescription>
-                          Provider-specific voice identifier (e.g., for Deepgram: aura-2-helena-en)
+                          Provider-specific voice identifiers per language (e.g., "aura-2-helena-en" for Deepgram)
                         </FormDescription>
                         <FormMessage />
                       </FormItem>

diff --git a/wavefront/client/src/types/voice-agent.ts b/wavefront/client/src/types/voice-agent.ts
@@ -13,7 +13,7 @@ export interface VoiceAgent {
   telephony_config_id: string;
   system_prompt: string;
   welcome_message: string;
-  tts_voice_id: string;
+  tts_voice_ids: Record<string, string>;
   tts_parameters: Record<string, unknown> | null;
   stt_parameters: Record<string, unknown> | null;
   conversation_config: Record<string, unknown> | null;
@@ -39,7 +39,7 @@ export interface CreateVoiceAgentRequest {
   telephony_config_id: string;
   system_prompt: string;
   welcome_message: string;
-  tts_voice_id: string;
+  tts_voice_ids: Record<string, string>;
   tts_parameters?: Record<string, unknown> | null;
   stt_parameters?: Record<string, unknown> | null;
   conversation_config?: Record<string, unknown> | null;
@@ -63,7 +63,7 @@ export interface UpdateVoiceAgentRequest {
   telephony_config_id?: string;
   system_prompt?: string;
   welcome_message?: string;
-  tts_voice_id?: string;
+  tts_voice_ids?: Record<string, string>;
   tts_parameters?: Record<string, unknown> | null;
   stt_parameters?: Record<string, unknown> | null;
   conversation_config?: Record<string, unknown> | null;

diff --git a/wavefront/server/apps/call_processing/call_processing/controllers/webhook_controller.py b/wavefront/server/apps/call_processing/call_processing/controllers/webhook_controller.py
@@ -16,8 +16,6 @@
 from pipecat.runner.types import WebSocketRunnerArguments
 from pipecat.runner.utils import parse_telephony_websocket
 
-# from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
-# from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
 from pipecat.serializers.twilio import TwilioFrameSerializer
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.audio.vad.vad_analyzer import VADParams
@@ -119,6 +117,8 @@ async def inbound_webhook(
 
     # Pass parameters to WebSocket stream
     stream.parameter(name='voice_agent_id', value=agent_id)
+    stream.parameter(name='customer_number', value=From)
+    stream.parameter(name='agent_number', value=To)
 
     connect.append(stream)
     response.append(connect)
@@ -134,6 +134,8 @@ async def inbound_webhook(
 
 @webhook_router.post('/twiml')
 async def twiml_endpoint(
+    From: str = Form(...),
+    To: str = Form(...),
     voice_agent_id: str = Query(...),
     welcome_message_audio_url: str = Query(default=''),
 ):
@@ -181,6 +183,8 @@ async def twiml_endpoint(
 
     # Pass parameters to WebSocket stream
     stream.parameter(name='voice_agent_id', value=voice_agent_id)
+    stream.parameter(name='customer_number', value=To)
+    stream.parameter(name='agent_number', value=From)
-    stream.parameter(name='customer_number', value=To)
-    stream.parameter(name='agent_number', value=From)
+    stream.parameter(name='customer_number', value=From)
+    stream.parameter(name='agent_number', value=To)
-    stream.parameter(name='customer_number', value=To)
-    stream.parameter(name='agent_number', value=From)
+    stream.parameter(name='customer_number', value=From)
+    stream.parameter(name='agent_number', value=To)
 
     connect.append(stream)
     response.append(connect)
@@ -223,6 +227,8 @@ async def websocket_endpoint(
         # Extract parameters from stream
         body_data = call_data.get('body', {})
         voice_agent_id = body_data.get('voice_agent_id')
+        customer_number = body_data.get('customer_number')
+        # agent_number = body_data.get('agent_number')
 
         if not voice_agent_id:
             logger.error('voice_agent_id not found in stream parameters')
@@ -263,13 +269,12 @@ async def websocket_endpoint(
                 vad_analyzer=SileroVADAnalyzer(
                     params=VADParams(
                         confidence=0.7,  # Default is 0.7, can lower to 0.4-0.5 for faster detection
-                        start_secs=0.15,  # Default is 0.2, keep it
-                        stop_secs=0.8,  # KEY: Lower from default 0.8 for faster cutoff (should be 0.2 for smart turn detection)
+                        start_secs=0.2,  # Default is 0.2, keep it
+                        stop_secs=0.2,  # KEY: Lower from default 0.8 for faster cutoff (should be 0.2 for smart turn detection)
                         min_volume=0.6,  # Default is 0.6, adjust based on your audio quality
                     ),
                 ),  # Voice Activity Detection
                 serializer=serializer,
-                # turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
             ),
         )
 
@@ -282,6 +287,7 @@ async def websocket_endpoint(
             tts_config=configs['tts_config'],
             stt_config=configs['stt_config'],
             tools=configs['tools'],
+            customer_number=customer_number,
         )
 
     except Exception as e: