import os, requests, time, json
from collections import defaultdict
from datetime import datetime, timedelta
MP_SA = os.environ["MIXPANEL_SERVICE_ACCOUNT"]
MP_SECRET = os.environ["MIXPANEL_SECRET"]
MP_PROJECT = os.environ["MIXPANEL_PROJECT_ID"]
MV = os.environ["MAVERA_API_KEY"]
MB = "https://app.mavera.io/api/v1"
MH = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
today = datetime.now()
from_date = (today - timedelta(days=30)).strftime("%Y-%m-%d")
to_date = today.strftime("%Y-%m-%d")
r = requests.get(
"https://data.mixpanel.com/api/2.0/export",
auth=(MP_SA, MP_SECRET),
params={"project_id": MP_PROJECT, "from_date": from_date, "to_date": to_date},
stream=True,
)
r.raise_for_status()
user_events = defaultdict(lambda: {"events": [], "features": set(), "days": set(), "count": 0})
for line in r.iter_lines():
if not line:
continue
event = json.loads(line)
props = event.get("properties", {})
uid = props.get("distinct_id")
if not uid:
continue
user_events[uid]["events"].append(event.get("event", ""))
user_events[uid]["features"].add(event.get("event", ""))
user_events[uid]["count"] += 1
ts = props.get("time")
if ts:
user_events[uid]["days"].add(datetime.fromtimestamp(ts).strftime("%Y-%m-%d"))
print(f"Processed events for {len(user_events)} users")
patterns = {"power_user": [], "regular": [], "casual": []}
for uid, data in user_events.items():
feature_count = len(data["features"])
event_count = data["count"]
active_days = len(data["days"])
profile = {
"uid": uid, "events": event_count, "features": feature_count,
"active_days": active_days, "top_events": data["events"][:5],
"feature_list": list(data["features"])[:10],
}
if feature_count >= 5 and active_days >= 15:
patterns["power_user"].append(profile)
elif feature_count >= 2 and active_days >= 5:
patterns["regular"].append(profile)
else:
patterns["casual"].append(profile)
existing = requests.get(f"{MB}/personas", headers=MH).json()
mp_personas = {
p["name"]: p for p in (existing if isinstance(existing, list) else [])
if "Mixpanel" in p.get("name", "")
}
for pattern_name, users in patterns.items():
if not users:
continue
label = pattern_name.replace("_", " ").title()
persona_name = f"Mixpanel: {label}"
avg_events = sum(u["events"] for u in users) / len(users)
avg_features = sum(u["features"] for u in users) / len(users)
avg_days = sum(u["active_days"] for u in users) / len(users)
from collections import Counter
all_features = Counter(f for u in users for f in u["feature_list"])
top_features = [f for f, _ in all_features.most_common(8)]
payload = {
"name": persona_name,
"description": (
f"{label} segment enriched with event data (30d). "
f"N={len(users)}. Avg events: {avg_events:.0f}, "
f"features used: {avg_features:.1f}, active days: {avg_days:.1f}. "
f"Top features: {', '.join(top_features[:5])}."
),
"psychographic": {
"usage_pattern": pattern_name,
"avg_events_30d": avg_events,
"avg_features_used": avg_features,
"avg_active_days": avg_days,
"top_features": top_features,
},
}
if persona_name in mp_personas:
pid = mp_personas[persona_name]["id"]
requests.patch(f"{MB}/personas/{pid}", headers=MH, json=payload).raise_for_status()
print(f"Updated: {persona_name} ({pid}) — {len(users)} users")
else:
p = requests.post(f"{MB}/personas", headers=MH, json=payload).json()
print(f"Created: {persona_name} ({p['id']}) — {len(users)} users")
time.sleep(0.3)