Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.mavera.io/llms.txt

Use this file to discover all available pages before exploring further.

Scenario

Your Mixpanel project has thousands of user profiles with demographic and behavioral properties — plan type, company size, role, signup source, total events fired. You query the Engage API to pull profiles, cluster them by usage patterns (power users vs. casual vs. dormant), and create a Mavera Custom Persona for each cluster. The result is a persona library built from actual product behavior.

Architecture

Code

import os, requests, time, base64
from collections import defaultdict

MP_SA = os.environ["MIXPANEL_SERVICE_ACCOUNT"]
MP_SECRET = os.environ["MIXPANEL_SECRET"]
MP_PROJECT = os.environ["MIXPANEL_PROJECT_ID"]
MV = os.environ["MAVERA_API_KEY"]
MB = "https://app.mavera.io/api/v1"
MH = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}

mp_auth = (MP_SA, MP_SECRET)

profiles = []
page = 0
session_id = None

while True:
    payload = {
        "project_id": MP_PROJECT,
        "filter_by_cohort": "",
        "output_properties": [
            "$email", "$name", "$city", "$country_code",
            "plan", "company_size", "role", "signup_source",
            "total_events", "last_seen", "sessions_count",
        ],
        "page": page,
    }
    if session_id:
        payload["session_id"] = session_id

    r = requests.post(
        "https://mixpanel.com/api/query/engage",
        auth=mp_auth,
        json=payload,
    )
    if r.status_code == 429:
        time.sleep(60)
        continue
    r.raise_for_status()
    data = r.json()

    session_id = data.get("session_id")
    results = data.get("results", [])
    profiles.extend(results)

    if len(results) < 1000 or len(profiles) >= 5000:
        break
    page += 1
    time.sleep(1)

print(f"Fetched {len(profiles)} profiles")

clusters = {"power_user": [], "regular": [], "casual": [], "dormant": []}
for p in profiles:
    props = p.get("$properties", {})
    events = int(props.get("total_events", 0) or 0)
    sessions = int(props.get("sessions_count", 0) or 0)

    if events > 500 and sessions > 50:
        clusters["power_user"].append(props)
    elif events > 100 and sessions > 10:
        clusters["regular"].append(props)
    elif events > 10:
        clusters["casual"].append(props)
    else:
        clusters["dormant"].append(props)

def cluster_profile(users):
    roles = defaultdict(int)
    plans = defaultdict(int)
    sizes = defaultdict(int)
    sources = defaultdict(int)
    for u in users:
        if u.get("role"): roles[u["role"]] += 1
        if u.get("plan"): plans[u["plan"]] += 1
        if u.get("company_size"): sizes[u["company_size"]] += 1
        if u.get("signup_source"): sources[u["signup_source"]] += 1
    top = lambda d, n=3: sorted(d, key=d.get, reverse=True)[:n]
    avg_events = sum(int(u.get("total_events", 0) or 0) for u in users) / max(len(users), 1)
    avg_sessions = sum(int(u.get("sessions_count", 0) or 0) for u in users) / max(len(users), 1)
    return {
        "n": len(users), "avg_events": avg_events, "avg_sessions": avg_sessions,
        "top_roles": top(roles), "top_plans": top(plans),
        "top_sizes": top(sizes), "top_sources": top(sources),
    }

created = []
for cluster_name, users in clusters.items():
    if not users:
        continue
    prof = cluster_profile(users)
    label = cluster_name.replace("_", " ").title()

    persona = requests.post(f"{MB}/personas", headers=MH, json={
        "name": f"Mixpanel: {label}",
        "description": (
            f"{label} segment from Mixpanel ({prof['n']} users). "
            f"Avg events: {prof['avg_events']:.0f}, Avg sessions: {prof['avg_sessions']:.0f}. "
            f"Roles: {', '.join(prof['top_roles'])}. "
            f"Plans: {', '.join(prof['top_plans'])}. "
            f"Company sizes: {', '.join(prof['top_sizes'])}. "
            f"Signup sources: {', '.join(prof['top_sources'])}."
        ),
        "demographic": {
            "job_titles": prof["top_roles"],
            "company_sizes": prof["top_sizes"],
        },
        "psychographic": {
            "usage_intensity": cluster_name,
            "avg_events": prof["avg_events"],
            "avg_sessions": prof["avg_sessions"],
        },
    }).json()

    created.append({"cluster": label, "id": persona["id"], "n": prof["n"]})
    print(f"  {label}: {persona['id']} ({prof['n']} users, avg {prof['avg_events']:.0f} events)")
    time.sleep(0.3)

print(f"\nCreated {len(created)} behavior-based personas")

Example Output

{
  "fetched": 4820,
  "personas": [
    { "cluster": "Power User", "id": "per_mp_power_1", "n": 312, "avg_events": 1240 },
    { "cluster": "Regular", "id": "per_mp_reg_2", "n": 1456, "avg_events": 280 },
    { "cluster": "Casual", "id": "per_mp_cas_3", "n": 2104, "avg_events": 42 },
    { "cluster": "Dormant", "id": "per_mp_dor_4", "n": 948, "avg_events": 4 }
  ],
  "sample": {
    "cluster": "Power User",
    "top_roles": ["Product Manager", "Growth Lead", "Head of Marketing"],
    "top_plans": ["Enterprise", "Pro"],
    "top_sources": ["organic", "referral", "direct"]
  }
}

Error Handling

The Engage API shares the 60 queries/hour limit. Each page request counts as one query. For 5,000+ profiles, this takes multiple pages — budget 1 query per 1,000 users. Add a 60s sleep on 429.
The Engage API returns a session_id on the first response. You must pass it on subsequent pages to maintain cursor position. If omitted, results may overlap.
Not all users have every property set. The clustering code handles None/0 defaults. Custom properties must be set via $set in your tracking code.

What’s Next

Mixpanel Integration

Back to Mixpanel integration overview

Funnel Drop-off → Focus Group

Investigate funnel abandonment with focus groups

Personas API

Full reference for POST /api/v1/personas

Mave Agent

Full reference for POST /api/v1/mave/chat