import os, requests, time
from collections import Counter
AMP_KEY = os.environ["AMPLITUDE_API_KEY"]
AMP_SECRET = os.environ["AMPLITUDE_SECRET_KEY"]
MV = os.environ["MAVERA_API_KEY"]
MB = "https://app.mavera.io/api/v1"
MH = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
amp_auth = (AMP_KEY, AMP_SECRET)
FAST_IDS = os.environ.get("FAST_ONBOARD_IDS", "").split(",")
SLOW_IDS = os.environ.get("SLOW_ONBOARD_IDS", "").split(",")
def fetch_activity(user_id):
r = requests.get(
"https://amplitude.com/api/2/useractivity",
auth=amp_auth,
params={"user": user_id},
)
if r.status_code == 429:
time.sleep(int(r.headers.get("Retry-After", 10)))
return fetch_activity(user_id)
if r.status_code != 200:
return {"events": [], "properties": {}}
data = r.json().get("userData", {})
return {"events": data.get("events", []), "properties": data.get("userProperties", {})}
def profile_cohort(user_ids, label):
events_all = Counter()
platforms = Counter()
countries = Counter()
event_counts = []
for uid in user_ids[:20]:
data = fetch_activity(uid)
user_events = [e.get("event_type", "") for e in data["events"] if not e.get("event_type", "").startswith("$")]
events_all.update(user_events)
event_counts.append(len(user_events))
props = data["properties"]
if props.get("platform"):
platforms[props["platform"]] += 1
if props.get("country"):
countries[props["country"]] += 1
time.sleep(1)
return {
"label": label,
"n": len(user_ids),
"sampled": min(len(user_ids), 20),
"avg_events": sum(event_counts) / max(len(event_counts), 1),
"top_events": events_all.most_common(8),
"platforms": platforms.most_common(3),
"countries": countries.most_common(5),
}
fast_profile = profile_cohort(FAST_IDS, "Fast Onboarders (<24h)")
slow_profile = profile_cohort(SLOW_IDS, "Slow Onboarders (>7d)")
fast_persona = requests.post(f"{MB}/personas", headers=MH, json={
"name": "Amplitude: Fast Onboarder (<24h)",
"description": (
f"Users who completed onboarding within 24 hours. "
f"Avg events: {fast_profile['avg_events']:.0f}. "
f"Top actions: {', '.join(e for e, _ in fast_profile['top_events'][:5])}. "
f"Platforms: {', '.join(p for p, _ in fast_profile['platforms'])}."
),
"psychographic": {
"onboarding_speed": "fast",
"avg_events": fast_profile["avg_events"],
"top_actions": [e for e, _ in fast_profile["top_events"][:5]],
},
}).json()
time.sleep(0.3)
slow_persona = requests.post(f"{MB}/personas", headers=MH, json={
"name": "Amplitude: Slow Onboarder (>7d)",
"description": (
f"Users who took 7+ days to complete onboarding. "
f"Avg events: {slow_profile['avg_events']:.0f}. "
f"Top actions: {', '.join(e for e, _ in slow_profile['top_events'][:5])}. "
f"Platforms: {', '.join(p for p, _ in slow_profile['platforms'])}."
),
"psychographic": {
"onboarding_speed": "slow",
"avg_events": slow_profile["avg_events"],
"top_actions": [e for e, _ in slow_profile["top_events"][:5]],
},
}).json()
def format_profile(prof):
events = ", ".join(f"{e} ({c})" for e, c in prof["top_events"][:6])
return f" N={prof['n']}, sampled={prof['sampled']}, avg events={prof['avg_events']:.0f}\n Top: {events}"
context = f"""Two behavioral cohorts from Amplitude onboarding data:
FAST ONBOARDERS (completed onboarding in <24 hours):
{format_profile(fast_profile)}
SLOW ONBOARDERS (took >7 days to complete onboarding):
{format_profile(slow_profile)}
Fast onboarders retain at 68% (30d). Slow onboarders retain at 22% (30d)."""
fg = requests.post(f"{MB}/focus-groups", headers=MH, json={
"name": "Amplitude: Onboarding Speed Cohort Study",
"persona_ids": [fast_persona["id"], slow_persona["id"]],
"questions": [
"Walk me through your first day with a new software tool. What makes you complete setup quickly versus putting it off?",
"When you signed up, what was your immediate goal? Were you trying to solve a specific problem or just exploring?",
"What would have made you complete onboarding faster? Be specific — was something confusing, unnecessary, or missing?",
"If a tool required you to invite teammates during onboarding, would that speed you up or slow you down? Why?",
"Rank what matters most in your first session: (1) Seeing sample data, (2) Completing a real task, (3) Customizing settings, (4) Reading documentation, (5) Watching a tutorial. Explain your #1.",
],
"context": context,
"responses_per_persona": 2,
}).json()
for _ in range(30):
time.sleep(5)
data = requests.get(f"{MB}/focus-groups/{fg['id']}", headers=MH).json()
if data.get("status") == "completed":
break
print(f"Focus Group: {data.get('id')} — {data.get('status')}\n")
for resp in data.get("responses", []):
print(f"[{resp.get('persona_id','?')}] {resp.get('question','')[:80]}")
print(f" → {resp.get('answer','')[:300]}\n")