> ## Documentation Index
> Fetch the complete documentation index at: https://docs.mavera.io/llms.txt
> Use this file to discover all available pages before exploring further.

# Speak Session Enhancement

## Scenario

Enhance Mavera Speak sessions by assigning custom ElevenLabs voices to different personas. Each persona gets a unique voice, creating an immersive multi-voice audio experience.

**Flow:** Mavera `GET /personas` → Match to ElevenLabs voices → `POST /mave/chat` (in-character) → ElevenLabs TTS per persona → Multi-voice audio

## Code

<CodeGroup>
  ```python Python theme={"dark"}
  import os, requests, time
  EL_KEY = os.environ["ELEVENLABS_API_KEY"]
  EL_BASE = "https://api.elevenlabs.io/v1"
  MV = os.environ["MAVERA_API_KEY"]
  MV_BASE = "https://app.mavera.io/api/v1"
  MV_H = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
  os.makedirs("speak_session_audio", exist_ok=True)
  VOICE_POOL = [
      {"name": "Rachel", "id": "21m00Tcm4TlvDq8ikWAM", "type": "professional female"},
      {"name": "Drew", "id": "29vD33N1CtxCmqQRPOHJ", "type": "confident male"},
      {"name": "Clyde", "id": "2EiwWnXFnvU5JabPnv8n", "type": "warm authoritative"},
      {"name": "Domi", "id": "AZnzlk1XvdvUeBnXmlld", "type": "energetic female"},
      {"name": "Dave", "id": "CYw3kZ02Hs0563khs1Fj", "type": "casual conversational"},
  ]

  # 1. Retrieve Mavera personas
  resp = requests.get(f"{MV_BASE}/personas", headers=MV_H).json()
  personas = (resp if isinstance(resp, list) else resp.get("data", []))[:5]
  print(f"Personas: {len(personas)}")

  # 2. Map personas to voices
  mappings = []
  for i, p in enumerate(personas):
      v = VOICE_POOL[i % len(VOICE_POOL)]
      mappings.append({"persona_id": p["id"], "name": p.get("name", f"Persona {i+1}"),
                       "voice_id": v["id"], "voice_name": v["name"]})
      print(f"  {p.get('name', 'N/A'):30s} → {v['name']} ({v['type']})")

  TOPIC = "What makes a marketing campaign truly memorable in 2026?"
  tracks = []

  for m in mappings:
      # 3. Generate in-character content
      chat = requests.post(f"{MV_BASE}/mave/chat", headers=MV_H, json={
          "message": f"You are {m['name']}. Respond to this discussion topic in 3-4 sentences, "
              f"staying in character. Speak naturally as in a roundtable.\n\nTopic: {TOPIC}",
          "persona_id": m["persona_id"],
      }).json()
      content = chat.get("content", "")
      print(f"\n[{m['name']}]: {content[:120]}...")
      time.sleep(1)

      # 4. Convert to audio with matched voice
      tts = requests.post(f"{EL_BASE}/text-to-speech/{m['voice_id']}",
          headers={"xi-api-key": EL_KEY, "Content-Type": "application/json"},
          json={"text": content, "model_id": "eleven_multilingual_v2",
                "voice_settings": {"stability": 0.45, "similarity_boost": 0.75,
                                   "style": 0.4, "use_speaker_boost": True}})
      if tts.status_code == 200:
          safe = m["name"].lower().replace(" ", "-").replace(",", "")[:30]
          path = f"speak_session_audio/{safe}.mp3"
          with open(path, "wb") as f: f.write(tts.content)
          tracks.append({"persona": m["name"], "voice": m["voice_name"],
                         "size_kb": len(tts.content) // 1024, "path": path})
          print(f"  → {path} ({len(tts.content) // 1024} KB)")
      time.sleep(2)

  print(f"\nTotal: {len(tracks)} tracks, {sum(t['size_kb'] for t in tracks)} KB")
  ```

  ```javascript JavaScript theme={"dark"}
  import fs from "fs";

  const EL_KEY = process.env.ELEVENLABS_API_KEY;
  const EL_BASE = "https://api.elevenlabs.io/v1";
  const MV = process.env.MAVERA_API_KEY;
  const MV_BASE = "https://app.mavera.io/api/v1";
  const MV_H = { Authorization: `Bearer ${MV}`, "Content-Type": "application/json" };
  fs.mkdirSync("speak_session_audio", { recursive: true });

  const VOICES = [
    { name: "Rachel", id: "21m00Tcm4TlvDq8ikWAM" }, { name: "Drew", id: "29vD33N1CtxCmqQRPOHJ" },
    { name: "Clyde", id: "2EiwWnXFnvU5JabPnv8n" }, { name: "Domi", id: "AZnzlk1XvdvUeBnXmlld" },
    { name: "Dave", id: "CYw3kZ02Hs0563khs1Fj" },
  ];

  const resp = await fetch(`${MV_BASE}/personas`, { headers: MV_H }).then(r => r.json());
  const personas = (Array.isArray(resp) ? resp : resp.data || []).slice(0, 5);
  const mappings = personas.map((p, i) => ({
    persona_id: p.id, name: p.name || `Persona ${i + 1}`,
    voice_id: VOICES[i % VOICES.length].id, voice_name: VOICES[i % VOICES.length].name,
  }));

  const TOPIC = "What makes a marketing campaign truly memorable in 2026?";
  const tracks = [];

  for (const m of mappings) {
    const chat = await fetch(`${MV_BASE}/mave/chat`, { method: "POST", headers: MV_H,
      body: JSON.stringify({ message: `You are ${m.name}. Respond in 3-4 sentences to: ${TOPIC}`,
        persona_id: m.persona_id }),
    }).then(r => r.json());
    const content = chat.content || "";
    console.log(`[${m.name}]: ${content.slice(0, 120)}...`);
    await new Promise(r => setTimeout(r, 1000));

    const tts = await fetch(`${EL_BASE}/text-to-speech/${m.voice_id}`, { method: "POST",
      headers: { "xi-api-key": EL_KEY, "Content-Type": "application/json" },
      body: JSON.stringify({ text: content, model_id: "eleven_multilingual_v2",
        voice_settings: { stability: 0.45, similarity_boost: 0.75, style: 0.4, use_speaker_boost: true } }),
    });
    if (tts.ok) {
      const buf = Buffer.from(await tts.arrayBuffer());
      const safe = m.name.toLowerCase().replace(/[\s,]/g, "-").slice(0, 30);
      fs.writeFileSync(`speak_session_audio/${safe}.mp3`, buf);
      tracks.push({ persona: m.name, voice: m.voice_name, kb: Math.round(buf.length / 1024) });
      console.log(`  → ${safe}.mp3 (${Math.round(buf.length / 1024)} KB)`);
    }
    await new Promise(r => setTimeout(r, 2000));
  }
  console.log(`Total: ${tracks.length} tracks, ${tracks.reduce((s, t) => s + t.kb, 0)} KB`);
  ```
</CodeGroup>

## Example Output

```text theme={"dark"}
  VP of Marketing   → Rachel  → vp-of-marketing.mp3 (67 KB)
  Product Manager   → Drew    → product-manager.mp3 (72 KB)
  Early-Stage Founder → Clyde → early-stage-founder.mp3 (58 KB)
  Gen Z Consumer    → Domi    → gen-z-consumer.mp3 (63 KB)

[VP of Marketing]: "Memorability comes from emotional resonance backed by data..."
[Gen Z Consumer]: "If it doesn't feel authentic, I scroll past in 0.3 seconds..."

Total: 4 tracks, 260 KB
```

## Error Handling

<AccordionGroup>
  <Accordion title="Voice ID mismatch">Pre-made IDs are stable but can change. Verify with `GET /voices` before starting. If a mapped voice is missing, fall back to the next in the pool.</Accordion>
  <Accordion title="More personas than voices">Voices repeat via modulo if personas exceed the pool. Expand by fetching all voices from `GET /voices` and selecting based on gender/accent metadata.</Accordion>
  <Accordion title="Merging tracks">Concatenate with ffmpeg: `ffmpeg -i "concat:track1.mp3|silence.mp3|track2.mp3" -c copy session.mp3`. Generate gaps: `ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t 1 silence.mp3`.</Accordion>
</AccordionGroup>
