import os, requests, time
from collections import defaultdict
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import (
RunReportRequest, Dimension, Metric, DateRange, OrderBy,
)
PROPERTY_ID = os.environ["GA4_PROPERTY_ID"]
MV = os.environ["MAVERA_API_KEY"]
MB = "https://app.mavera.io/api/v1"
MH = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
client = BetaAnalyticsDataClient()
report = client.run_report(RunReportRequest(
property=f"properties/{PROPERTY_ID}",
dimensions=[
Dimension(name="sessionSource"),
Dimension(name="sessionMedium"),
Dimension(name="userAgeBracket"),
Dimension(name="userGender"),
],
metrics=[
Metric(name="totalUsers"),
Metric(name="conversions"),
Metric(name="engagementRate"),
Metric(name="averageSessionDuration"),
],
date_ranges=[DateRange(start_date="30daysAgo", end_date="today")],
order_bys=[OrderBy(metric=OrderBy.MetricOrderBy(metric_name="totalUsers"), desc=True)],
limit=500,
))
channels = defaultdict(lambda: {
"users": 0, "conversions": 0, "engagement_sum": 0, "duration_sum": 0,
"age_dist": defaultdict(int), "gender_dist": defaultdict(int), "count": 0,
})
for row in report.rows:
source = row.dimension_values[0].value
medium = row.dimension_values[1].value
age = row.dimension_values[2].value
gender = row.dimension_values[3].value
users = int(row.metric_values[0].value)
conversions = int(row.metric_values[1].value)
engagement = float(row.metric_values[2].value)
duration = float(row.metric_values[3].value)
if age == "(not set)" or gender == "(not set)":
continue
key = f"{source}/{medium}"
channels[key]["users"] += users
channels[key]["conversions"] += conversions
channels[key]["engagement_sum"] += engagement * users
channels[key]["duration_sum"] += duration * users
channels[key]["age_dist"][age] += users
channels[key]["gender_dist"][gender] += users
channels[key]["count"] += 1
channel_profiles = []
for channel, data in channels.items():
if data["users"] < 50:
continue
top_age = max(data["age_dist"], key=data["age_dist"].get) if data["age_dist"] else "unknown"
top_gender = max(data["gender_dist"], key=data["gender_dist"].get) if data["gender_dist"] else "unknown"
channel_profiles.append({
"channel": channel,
"users": data["users"],
"conversions": data["conversions"],
"avg_engagement": data["engagement_sum"] / max(data["users"], 1),
"avg_duration": data["duration_sum"] / max(data["users"], 1),
"top_age": top_age,
"top_gender": top_gender,
"conv_rate": data["conversions"] / max(data["users"], 1),
"age_dist": dict(data["age_dist"]),
"gender_dist": dict(data["gender_dist"]),
})
channel_profiles.sort(key=lambda c: c["users"], reverse=True)
created = []
for cp in channel_profiles[:8]:
name = f"GA4 Channel: {cp['channel']} ({cp['top_gender']} {cp['top_age']})"
age_breakdown = ", ".join(f"{a}: {n}" for a, n in sorted(cp["age_dist"].items(), key=lambda x: -x[1])[:3])
gender_breakdown = ", ".join(f"{g}: {n}" for g, n in sorted(cp["gender_dist"].items(), key=lambda x: -x[1]))
persona = requests.post(f"{MB}/personas", headers=MH, json={
"name": name,
"description": (
f"Persona from GA4 channel {cp['channel']} (30d). "
f"Primary: {cp['top_gender']} {cp['top_age']}. "
f"Users: {cp['users']}, Conv: {cp['conversions']} ({cp['conv_rate']:.2%}). "
f"Engagement: {cp['avg_engagement']:.0%}. Session: {cp['avg_duration']:.0f}s. "
f"Age mix: {age_breakdown}. Gender: {gender_breakdown}."
),
"demographic": {
"age_range": cp["top_age"],
"gender": cp["top_gender"],
},
"psychographic": {
"acquisition_channel": cp["channel"],
"engagement_level": "high" if cp["avg_engagement"] > 0.6 else "medium",
},
}).json()
created.append({"channel": cp["channel"], "id": persona["id"], "users": cp["users"]})
print(f" {name} → {persona['id']}")
time.sleep(0.3)
print(f"\nMapped {len(created)} channel-persona pairs")