Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.mavera.io/llms.txt

Use this file to discover all available pages before exploring further.

Scenario

Your Qualtrics survey has 5,000+ responses across 40 questions — demographics, psychographics, behavioral data, and open-ended feedback. This is the richest persona-building dataset in your organization, but it sits in a Qualtrics dashboard that only the research team accesses. This job exports the full response dataset using Qualtrics’ async export flow (create export → poll for completion → download CSV), sends it to Mave Agent for segment discovery, then creates a comprehensive persona library in Mavera. The result is an enterprise-grade persona set grounded in thousands of real survey responses. Flow: Qualtrics async export (POST /surveys/{id}/export-responsesGET .../export-responses/{exportId} → Download) → Parse CSV → Mave segment discovery → POST /api/v1/personas per segment → Comprehensive persona library

Architecture

Code

import os, csv, io, json, zipfile, requests, time
from collections import Counter, defaultdict

QT = os.environ["QUALTRICS_TOKEN"]
DC = os.environ["QUALTRICS_DC"]
MV = os.environ["MAVERA_API_KEY"]
Q_BASE = f"https://{DC}.qualtrics.com/API/v3"
MB = "https://app.mavera.io/api/v1"
Q_H = {"X-API-TOKEN": QT, "Content-Type": "application/json"}
MV_H = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}

SURVEY_ID = os.environ.get("QUALTRICS_SURVEY_ID", "SV_xxxxx")

# 1. Create response export
export = requests.post(f"{Q_BASE}/surveys/{SURVEY_ID}/export-responses",
    headers=Q_H, json={"format": "csv"}).json()
progress_id = export.get("result", {}).get("progressId")
print(f"Export started: {progress_id}")

# 2. Poll until complete
file_id = None
for attempt in range(60):
    time.sleep(5)
    status = requests.get(
        f"{Q_BASE}/surveys/{SURVEY_ID}/export-responses/{progress_id}",
        headers=Q_H).json()
    pct = status.get("result", {}).get("percentComplete", 0)
    print(f"  Progress: {pct}% (attempt {attempt + 1})")

    if pct == 100:
        file_id = status["result"].get("fileId")
        break
    if status.get("result", {}).get("status") == "failed":
        print(f"Export failed: {status}")
        exit()

if not file_id:
    print("Export timed out")
    exit()

# 3. Download and parse
zip_data = requests.get(
    f"{Q_BASE}/surveys/{SURVEY_ID}/export-responses/{file_id}/file",
    headers=Q_H).content

with zipfile.ZipFile(io.BytesIO(zip_data)) as zf:
    csv_name = [n for n in zf.namelist() if n.endswith(".csv")][0]
    with zf.open(csv_name) as f:
        reader = csv.DictReader(io.TextIOWrapper(f, encoding="utf-8-sig"))
        rows = list(reader)

# Filter out header rows (Qualtrics includes 2 extra header rows)
responses = [r for r in rows if r.get("Status", "") not in ("", "Response Type")]
responses = [r for r in responses if r.get("Finished", "1") == "1"]
print(f"Parsed {len(responses)} completed responses")

# 4. Build analysis summary
# Identify key columns (skip metadata columns)
meta_cols = {"StartDate", "EndDate", "Status", "IPAddress", "Progress",
             "Duration (in seconds)", "Finished", "RecordedDate",
             "ResponseId", "RecipientLastName", "RecipientFirstName",
             "RecipientEmail", "ExternalReference", "LocationLatitude",
             "LocationLongitude", "DistributionChannel", "UserLanguage"}
data_cols = [c for c in responses[0].keys() if c not in meta_cols][:30]

col_summaries = {}
for col in data_cols:
    values = [r.get(col, "").strip() for r in responses if r.get(col, "").strip()]
    if not values:
        continue
    unique = set(values)
    if len(unique) <= 25:
        counts = Counter(values).most_common(10)
        col_summaries[col] = f"n={len(values)} | " + ", ".join(
            f"{v}: {c} ({c/len(values)*100:.0f}%)" for v, c in counts)
    else:
        col_summaries[col] = f"n={len(values)} | Samples: {'; '.join(list(values)[:8])}"

summary = "\n".join(f"**{k}**: {v}" for k, v in col_summaries.items())

# 5. Mave segment discovery
segments = requests.post(f"{MB}/mave/chat", headers=MV_H, json={
    "message": f"""Analyze {len(responses)} enterprise survey responses.

COLUMN SUMMARIES:
{summary[:6000]}

Tasks:
1) Identify 5-8 distinct audience segments from answer patterns
2) For each: name, % of audience, defining characteristics, pain points,
   goals, preferred communication style, buying triggers
3) Rate confidence (low/medium/high) for each segment
4) Note cross-segment patterns and tensions
5) Suggest segment-specific messaging angles

Return structured JSON with a "segments" array."""
}).json()

content = segments.get("content", "")
print("=== Segment Discovery ===")
print(content[:2000])

# 6. Create persona library
try:
    json_start = content.find("[")
    json_end = content.rfind("]") + 1
    if json_start >= 0 and json_end > json_start:
        parsed = json.loads(content[json_start:json_end])
    else:
        parsed_obj = json.loads(
            content[content.find("{"):content.rfind("}")+1])
        parsed = parsed_obj.get("segments", [])
except (json.JSONDecodeError, ValueError):
    parsed = []

personas = []
for seg in parsed:
    name = seg.get("name", "Unknown Segment")
    pct = seg.get("percentage", seg.get("size_percent", "?"))
    confidence = seg.get("confidence", "medium")

    r = requests.post(f"{MB}/personas", headers=MV_H, json={
        "name": f"QX: {name}",
        "description": (
            f"Enterprise survey segment ({len(responses)} respondents). "
            f"Est. {pct}% of audience. Confidence: {confidence}. "
            f"{seg.get('characteristics', seg.get('description', ''))}"
        ),
        "demographic": seg.get("demographic", {}),
        "psychographic": {
            "pain_points": seg.get("pain_points", []),
            "goals": seg.get("goals", []),
            "buying_triggers": seg.get("buying_triggers", []),
            "communication_style": seg.get("communication_style", ""),
        },
    })
    r.raise_for_status()
    personas.append({"name": name, "id": r.json()["id"], "pct": pct,
                     "confidence": confidence})
    print(f"Created: {name} ({pct}%, {confidence}) → {r.json()['id']}")
    time.sleep(0.3)

print(f"\nPersona library: {len(personas)} segments from {len(responses)} responses")

Example Output

Export started: ES_abc123def
  Progress: 25% (attempt 1)
  Progress: 75% (attempt 2)
  Progress: 100% (attempt 3)
Parsed 4,832 completed responses

=== Segment Discovery ===
[
  {"name": "Strategic Decision Maker", "percentage": 22,
   "confidence": "high",
   "characteristics": "C-level or VP, 500+ employee companies, high purchase authority",
   "pain_points": ["Vendor consolidation pressure", "Board-level ROI justification"],
   "goals": ["Strategic transformation", "Competitive advantage"],
   "buying_triggers": ["Peer company adoption", "Industry mandate"]},
  {"name": "Hands-On Evaluator", "percentage": 31,
   "confidence": "high",
   "characteristics": "Director/Manager level, runs POCs, technical decision influence",
   "pain_points": ["Integration complexity", "Time to evaluate"],
   "goals": ["Fast proof of value", "Team adoption"],
   "buying_triggers": ["Self-serve trial", "Case study from similar org"]},
  {"name": "Budget-Constrained Pragmatist", "percentage": 18,
   "confidence": "medium",
   "characteristics": "SMB or startup, price-sensitive, wears multiple hats",
   "pain_points": ["Affordability", "Implementation resources"],
   "goals": ["Maximum ROI per dollar", "Low-maintenance solution"],
   "buying_triggers": ["Startup pricing", "All-in-one platform"]}
]

Created: Strategic Decision Maker (22%, high) → per_qx_sdm_1
Created: Hands-On Evaluator (31%, high) → per_qx_hoe_2
Created: Budget-Constrained Pragmatist (18%, medium) → per_qx_bcp_3
Created: Risk-Averse Enterprise (15%, medium) → per_qx_rae_4
Created: Innovation Champion (14%, medium) → per_qx_ic_5

Persona library: 5 segments from 4,832 responses

Error Handling

Qualtrics exports are async: create → poll → download. Large surveys (10K+ responses) may take 5+ minutes. The code polls for up to 5 minutes (60 × 5s). For very large surveys, increase the timeout.
Qualtrics CSV exports include 3 header rows (column names, import IDs, question text). The code skips rows 2-3 by filtering on the Status field. Verify your export format matches.
The export downloads as a ZIP file containing one CSV. Python uses zipfile; JavaScript needs jszip. Install jszip with npm install jszip.
The API URL includes your datacenter ID (e.g., ca1, iad1, fra1). Find it in Account Settings → Qualtrics IDs. Using the wrong DC returns 404.