import os, requests, csv, io, time
from collections import defaultdict
SR, MV = os.environ["SEMRUSH_API_KEY"], os.environ["MAVERA_API_KEY"]
MB = "https://app.mavera.io/api/v1"
MH = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
SEED = "content marketing automation"
resp = requests.get("https://api.semrush.com/", params={
"type": "phrase_related", "key": SR, "phrase": SEED,
"database": "us", "display_limit": 100, "export_columns": "Ph,Nq,Kd,Co",
})
reader = csv.reader(io.StringIO(resp.text), delimiter=";")
next(reader)
kws = [{"keyword": r[0], "volume": int(r[1] or 0), "difficulty": int(r[2] or 0)}
for r in reader if len(r) >= 4]
intent_map = {"how": "info", "best": "commercial", "tool": "transactional", "software": "transactional"}
stops = {"how", "to", "what", "is", "the", "a", "for", "and", "best", "top"}
clusters = defaultdict(list)
for kw in kws:
words = kw["keyword"].lower().split()
intent = next((intent_map[w] for w in words if w in intent_map), "info")
topic = next((w for w in words if w not in stops), words[0])
clusters[f"{intent}:{topic}"].append(kw)
top = sorted(clusters.items(), key=lambda c: sum(k["volume"] for k in c[1]), reverse=True)[:4]
pids = []
for key, group in top:
intent, topic = key.split(":", 1)
p = requests.post(f"{MB}/personas", headers=MH, json={
"name": f"Reader: {intent} — {topic}",
"description": f"Searches for {intent} content about '{topic}'. "
f"{len(group)} kws, {sum(k['volume'] for k in group)} vol.",
}).json()
pids.append(p["id"])
time.sleep(0.3)
concepts = [f"{k.split(':')[0]} about '{k.split(':')[1]}': "
+ ", ".join(kw["keyword"] for kw in sorted(v, key=lambda x: -x["volume"])[:5])
for k, v in top]
fg = requests.post(f"{MB}/focus-groups", headers=MH, json={
"name": f"Cluster Validation: {SEED}", "persona_ids": pids,
"questions": [
"Which concept would you click first?\n" + "\n".join(f"{i+1}. {c}" for i, c in enumerate(concepts)),
"What question must a blog post answer for you to read it fully?",
"Comprehensive guide (3000+ words) or quick checklist? Why?",
], "responses_per_persona": 2,
}).json()
for _ in range(24):
time.sleep(5)
data = requests.get(f"{MB}/focus-groups/{fg['id']}", headers=MH).json()
if data.get("status") == "completed": break
for r in data.get("responses", [])[:8]:
print(f"[{r.get('persona_id','?')}] {r.get('answer','')[:250]}\n")