import os, csv, io, json, zipfile, requests, time
from collections import Counter, defaultdict
QT = os.environ["QUALTRICS_TOKEN"]
DC = os.environ["QUALTRICS_DC"]
MV = os.environ["MAVERA_API_KEY"]
Q_BASE = f"https://{DC}.qualtrics.com/API/v3"
MB = "https://app.mavera.io/api/v1"
Q_H = {"X-API-TOKEN": QT, "Content-Type": "application/json"}
MV_H = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
SURVEY_ID = os.environ.get("QUALTRICS_SURVEY_ID", "SV_xxxxx")
# 1. Create response export
export = requests.post(f"{Q_BASE}/surveys/{SURVEY_ID}/export-responses",
headers=Q_H, json={"format": "csv"}).json()
progress_id = export.get("result", {}).get("progressId")
print(f"Export started: {progress_id}")
# 2. Poll until complete
file_id = None
for attempt in range(60):
time.sleep(5)
status = requests.get(
f"{Q_BASE}/surveys/{SURVEY_ID}/export-responses/{progress_id}",
headers=Q_H).json()
pct = status.get("result", {}).get("percentComplete", 0)
print(f" Progress: {pct}% (attempt {attempt + 1})")
if pct == 100:
file_id = status["result"].get("fileId")
break
if status.get("result", {}).get("status") == "failed":
print(f"Export failed: {status}")
exit()
if not file_id:
print("Export timed out")
exit()
# 3. Download and parse
zip_data = requests.get(
f"{Q_BASE}/surveys/{SURVEY_ID}/export-responses/{file_id}/file",
headers=Q_H).content
with zipfile.ZipFile(io.BytesIO(zip_data)) as zf:
csv_name = [n for n in zf.namelist() if n.endswith(".csv")][0]
with zf.open(csv_name) as f:
reader = csv.DictReader(io.TextIOWrapper(f, encoding="utf-8-sig"))
rows = list(reader)
# Filter out header rows (Qualtrics includes 2 extra header rows)
responses = [r for r in rows if r.get("Status", "") not in ("", "Response Type")]
responses = [r for r in responses if r.get("Finished", "1") == "1"]
print(f"Parsed {len(responses)} completed responses")
# 4. Build analysis summary
# Identify key columns (skip metadata columns)
meta_cols = {"StartDate", "EndDate", "Status", "IPAddress", "Progress",
"Duration (in seconds)", "Finished", "RecordedDate",
"ResponseId", "RecipientLastName", "RecipientFirstName",
"RecipientEmail", "ExternalReference", "LocationLatitude",
"LocationLongitude", "DistributionChannel", "UserLanguage"}
data_cols = [c for c in responses[0].keys() if c not in meta_cols][:30]
col_summaries = {}
for col in data_cols:
values = [r.get(col, "").strip() for r in responses if r.get(col, "").strip()]
if not values:
continue
unique = set(values)
if len(unique) <= 25:
counts = Counter(values).most_common(10)
col_summaries[col] = f"n={len(values)} | " + ", ".join(
f"{v}: {c} ({c/len(values)*100:.0f}%)" for v, c in counts)
else:
col_summaries[col] = f"n={len(values)} | Samples: {'; '.join(list(values)[:8])}"
summary = "\n".join(f"**{k}**: {v}" for k, v in col_summaries.items())
# 5. Mave segment discovery
segments = requests.post(f"{MB}/mave/chat", headers=MV_H, json={
"message": f"""Analyze {len(responses)} enterprise survey responses.
COLUMN SUMMARIES:
{summary[:6000]}
Tasks:
1) Identify 5-8 distinct audience segments from answer patterns
2) For each: name, % of audience, defining characteristics, pain points,
goals, preferred communication style, buying triggers
3) Rate confidence (low/medium/high) for each segment
4) Note cross-segment patterns and tensions
5) Suggest segment-specific messaging angles
Return structured JSON with a "segments" array."""
}).json()
content = segments.get("content", "")
print("=== Segment Discovery ===")
print(content[:2000])
# 6. Create persona library
try:
json_start = content.find("[")
json_end = content.rfind("]") + 1
if json_start >= 0 and json_end > json_start:
parsed = json.loads(content[json_start:json_end])
else:
parsed_obj = json.loads(
content[content.find("{"):content.rfind("}")+1])
parsed = parsed_obj.get("segments", [])
except (json.JSONDecodeError, ValueError):
parsed = []
personas = []
for seg in parsed:
name = seg.get("name", "Unknown Segment")
pct = seg.get("percentage", seg.get("size_percent", "?"))
confidence = seg.get("confidence", "medium")
r = requests.post(f"{MB}/personas", headers=MV_H, json={
"name": f"QX: {name}",
"description": (
f"Enterprise survey segment ({len(responses)} respondents). "
f"Est. {pct}% of audience. Confidence: {confidence}. "
f"{seg.get('characteristics', seg.get('description', ''))}"
),
"demographic": seg.get("demographic", {}),
"psychographic": {
"pain_points": seg.get("pain_points", []),
"goals": seg.get("goals", []),
"buying_triggers": seg.get("buying_triggers", []),
"communication_style": seg.get("communication_style", ""),
},
})
r.raise_for_status()
personas.append({"name": name, "id": r.json()["id"], "pct": pct,
"confidence": confidence})
print(f"Created: {name} ({pct}%, {confidence}) → {r.json()['id']}")
time.sleep(0.3)
print(f"\nPersona library: {len(personas)} segments from {len(responses)} responses")