User Profile Clustering → Personas

Scenario

Your Mixpanel project has thousands of user profiles with demographic and behavioral properties — plan type, company size, role, signup source, total events fired. You query the Engage API to pull profiles, cluster them by usage patterns (power users vs. casual vs. dormant), and create a Mavera Custom Persona for each cluster. The result is a persona library built from actual product behavior.

Architecture

Code

import os, requests, time, base64
from collections import defaultdict

MP_SA = os.environ["MIXPANEL_SERVICE_ACCOUNT"]
MP_SECRET = os.environ["MIXPANEL_SECRET"]
MP_PROJECT = os.environ["MIXPANEL_PROJECT_ID"]
MV = os.environ["MAVERA_API_KEY"]
MB = "https://app.mavera.io/api/v1"
MH = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}

mp_auth = (MP_SA, MP_SECRET)

profiles = []
page = 0
session_id = None

while True:
    payload = {
        "project_id": MP_PROJECT,
        "filter_by_cohort": "",
        "output_properties": [
            "$email", "$name", "$city", "$country_code",
            "plan", "company_size", "role", "signup_source",
            "total_events", "last_seen", "sessions_count",
        ],
        "page": page,
    }
    if session_id:
        payload["session_id"] = session_id

    r = requests.post(
        "https://mixpanel.com/api/query/engage",
        auth=mp_auth,
        json=payload,
    )
    if r.status_code == 429:
        time.sleep(60)
        continue
    r.raise_for_status()
    data = r.json()

    session_id = data.get("session_id")
    results = data.get("results", [])
    profiles.extend(results)

    if len(results) < 1000 or len(profiles) >= 5000:
        break
    page += 1
    time.sleep(1)

print(f"Fetched {len(profiles)} profiles")

clusters = {"power_user": [], "regular": [], "casual": [], "dormant": []}
for p in profiles:
    props = p.get("$properties", {})
    events = int(props.get("total_events", 0) or 0)
    sessions = int(props.get("sessions_count", 0) or 0)

    if events > 500 and sessions > 50:
        clusters["power_user"].append(props)
    elif events > 100 and sessions > 10:
        clusters["regular"].append(props)
    elif events > 10:
        clusters["casual"].append(props)
    else:
        clusters["dormant"].append(props)

def cluster_profile(users):
    roles = defaultdict(int)
    plans = defaultdict(int)
    sizes = defaultdict(int)
    sources = defaultdict(int)
    for u in users:
        if u.get("role"): roles[u["role"]] += 1
        if u.get("plan"): plans[u["plan"]] += 1
        if u.get("company_size"): sizes[u["company_size"]] += 1
        if u.get("signup_source"): sources[u["signup_source"]] += 1
    top = lambda d, n=3: sorted(d, key=d.get, reverse=True)[:n]
    avg_events = sum(int(u.get("total_events", 0) or 0) for u in users) / max(len(users), 1)
    avg_sessions = sum(int(u.get("sessions_count", 0) or 0) for u in users) / max(len(users), 1)
    return {
        "n": len(users), "avg_events": avg_events, "avg_sessions": avg_sessions,
        "top_roles": top(roles), "top_plans": top(plans),
        "top_sizes": top(sizes), "top_sources": top(sources),
    }

created = []
for cluster_name, users in clusters.items():
    if not users:
        continue
    prof = cluster_profile(users)
    label = cluster_name.replace("_", " ").title()

    persona = requests.post(f"{MB}/personas", headers=MH, json={
        "name": f"Mixpanel: {label}",
        "description": (
            f"{label} segment from Mixpanel ({prof['n']} users). "
            f"Avg events: {prof['avg_events']:.0f}, Avg sessions: {prof['avg_sessions']:.0f}. "
            f"Roles: {', '.join(prof['top_roles'])}. "
            f"Plans: {', '.join(prof['top_plans'])}. "
            f"Company sizes: {', '.join(prof['top_sizes'])}. "
            f"Signup sources: {', '.join(prof['top_sources'])}."
        ),
        "demographic": {
            "job_titles": prof["top_roles"],
            "company_sizes": prof["top_sizes"],
        },
        "psychographic": {
            "usage_intensity": cluster_name,
            "avg_events": prof["avg_events"],
            "avg_sessions": prof["avg_sessions"],
        },
    }).json()

    created.append({"cluster": label, "id": persona["id"], "n": prof["n"]})
    print(f"  {label}: {persona['id']} ({prof['n']} users, avg {prof['avg_events']:.0f} events)")
    time.sleep(0.3)

print(f"\nCreated {len(created)} behavior-based personas")

const MP_SA = process.env.MIXPANEL_SERVICE_ACCOUNT;
const MP_SECRET = process.env.MIXPANEL_SECRET;
const MP_PROJECT = process.env.MIXPANEL_PROJECT_ID;
const MV = process.env.MAVERA_API_KEY;
const MB = "https://app.mavera.io/api/v1";
const MH = { Authorization: `Bearer ${MV}`, "Content-Type": "application/json" };
const mpAuth = "Basic " + Buffer.from(`${MP_SA}:${MP_SECRET}`).toString("base64");

const profiles = [];
let page = 0, sessionId = null;

while (true) {
  const payload = {
    project_id: MP_PROJECT,
    output_properties: [
      "$email", "$name", "$city", "$country_code",
      "plan", "company_size", "role", "signup_source",
      "total_events", "last_seen", "sessions_count",
    ],
    page,
  };
  if (sessionId) payload.session_id = sessionId;

  const res = await fetch("https://mixpanel.com/api/query/engage", {
    method: "POST",
    headers: { Authorization: mpAuth, "Content-Type": "application/json" },
    body: JSON.stringify(payload),
  });
  if (res.status === 429) { await new Promise((r) => setTimeout(r, 60000)); continue; }
  const data = await res.json();

  sessionId = data.session_id;
  profiles.push(...(data.results || []));
  if ((data.results || []).length < 1000 || profiles.length >= 5000) break;
  page++;
  await new Promise((r) => setTimeout(r, 1000));
}

console.log(`Fetched ${profiles.length} profiles`);

const clusters = { power_user: [], regular: [], casual: [], dormant: [] };
for (const p of profiles) {
  const props = p.$properties || {};
  const events = parseInt(props.total_events || "0");
  const sessions = parseInt(props.sessions_count || "0");
  if (events > 500 && sessions > 50) clusters.power_user.push(props);
  else if (events > 100 && sessions > 10) clusters.regular.push(props);
  else if (events > 10) clusters.casual.push(props);
  else clusters.dormant.push(props);
}

function clusterProfile(users) {
  const count = (arr, key) => {
    const m = {};
    arr.forEach((u) => { if (u[key]) m[u[key]] = (m[u[key]] || 0) + 1; });
    return Object.keys(m).sort((a, b) => m[b] - m[a]).slice(0, 3);
  };
  const avgEvents = users.reduce((s, u) => s + parseInt(u.total_events || "0"), 0) / (users.length || 1);
  const avgSessions = users.reduce((s, u) => s + parseInt(u.sessions_count || "0"), 0) / (users.length || 1);
  return {
    n: users.length, avgEvents, avgSessions,
    topRoles: count(users, "role"), topPlans: count(users, "plan"),
    topSizes: count(users, "company_size"), topSources: count(users, "signup_source"),
  };
}

const created = [];
for (const [clusterName, users] of Object.entries(clusters)) {
  if (!users.length) continue;
  const prof = clusterProfile(users);
  const label = clusterName.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());

  const persona = await fetch(`${MB}/personas`, {
    method: "POST", headers: MH,
    body: JSON.stringify({
      name: `Mixpanel: ${label}`,
      description: `${label} (${prof.n} users). Avg events: ${prof.avgEvents.toFixed(0)}, sessions: ${prof.avgSessions.toFixed(0)}. Roles: ${prof.topRoles.join(", ")}. Plans: ${prof.topPlans.join(", ")}.`,
      demographic: { job_titles: prof.topRoles, company_sizes: prof.topSizes },
      psychographic: { usage_intensity: clusterName, avg_events: prof.avgEvents },
    }),
  }).then((r) => r.json());

  created.push({ cluster: label, id: persona.id, n: prof.n });
  console.log(`  ${label}: ${persona.id} (${prof.n} users, avg ${prof.avgEvents.toFixed(0)} events)`);
  await new Promise((r) => setTimeout(r, 300));
}

console.log(`\nCreated ${created.length} behavior-based personas`);

Example Output

{
  "fetched": 4820,
  "personas": [
    { "cluster": "Power User", "id": "per_mp_power_1", "n": 312, "avg_events": 1240 },
    { "cluster": "Regular", "id": "per_mp_reg_2", "n": 1456, "avg_events": 280 },
    { "cluster": "Casual", "id": "per_mp_cas_3", "n": 2104, "avg_events": 42 },
    { "cluster": "Dormant", "id": "per_mp_dor_4", "n": 948, "avg_events": 4 }
  ],
  "sample": {
    "cluster": "Power User",
    "top_roles": ["Product Manager", "Growth Lead", "Head of Marketing"],
    "top_plans": ["Enterprise", "Pro"],
    "top_sources": ["organic", "referral", "direct"]
  }
}

Error Handling

Engage API rate limit (60/hour)

The Engage API shares the 60 queries/hour limit. Each page request counts as one query. For 5,000+ profiles, this takes multiple pages — budget 1 query per 1,000 users. Add a 60s sleep on 429.

Session ID for pagination

The Engage API returns a session_id on the first response. You must pass it on subsequent pages to maintain cursor position. If omitted, results may overlap.

Missing profile properties

Not all users have every property set. The clustering code handles None/0 defaults. Custom properties must be set via $set in your tracking code.

What’s Next

Mixpanel Integration

Back to Mixpanel integration overview

Funnel Drop-off → Focus Group

Investigate funnel abandonment with focus groups

Personas API

Full reference for POST /api/v1/personas

Mave Agent

Full reference for POST /api/v1/mave/chat

Mixpanel

Funnel Drop-off → Focus Group Investigation

⌘I

​Scenario

​Architecture

​Code

​Example Output

​Error Handling

​What’s Next