import os, re, requests
WP, MV = os.environ["WORDPRESS_URL"], os.environ["MAVERA_API_KEY"]
MB = "https://app.mavera.io/api/v1"
MH = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
strip_html = lambda h: re.sub(r"\s+", " ", re.sub(r"<[^>]+>", "", h or "")).strip()
posts, page = [], 1
while True:
resp = requests.get(f"{WP}/wp-json/wp/v2/posts", params={
"status": "publish", "per_page": 100, "page": page,
"_fields": "id,title,content,comment_count,date,modified,link"})
if resp.status_code == 400: break
resp.raise_for_status()
batch = resp.json()
if not batch: break
posts.extend(batch)
if page >= int(resp.headers.get("X-WP-TotalPages", 1)): break
page += 1
print(f"Fetched {len(posts)} posts")
posts.sort(key=lambda p: p.get("comment_count", 0), reverse=True)
cutoff = max(len(posts) // 4, 3)
high, low = posts[:cutoff], [p for p in posts[-cutoff:] if p.get("comment_count", 0) == 0] or posts[-cutoff:]
print(f"High: {len(high)} | Low: {len(low)}")
persona_ids = []
for label, group, desc in [
("Engaged Reader", high, "Readers who comment on top-performing content"),
("Silent Visitor", low, "Visitors who read low-engagement content but never comment"),
]:
titles = ", ".join(p["title"]["rendered"] for p in group[:5])
r = requests.post(f"{MB}/personas", json={"name": f"WordPress {label}", "description": f"{desc}. Posts: {titles}."}, headers=MH)
r.raise_for_status(); persona_ids.append(r.json()["id"])
print(f" Persona: {r.json()['name']} ({r.json()['id']})")
high_s = "\n".join(f"- \"{p['title']['rendered']}\" ({p.get('comment_count',0)} comments)" for p in high[:5])
low_s = "\n".join(f"- \"{p['title']['rendered']}\" ({p.get('comment_count',0)} comments)" for p in low[:5])
fg = requests.post(f"{MB}/focus-groups", json={
"name": "WordPress Content Performance Analysis", "persona_ids": persona_ids,
"questions": ["What about high-performing content makes you engage?",
"Why might low-performing posts fail to hold attention?",
"What change would make you comment on a post?",
"How important is the opening paragraph?"],
"context": f"HIGH:\n{high_s}\n\nExcerpt: {strip_html(high[0]['content']['rendered'])[:400]}\n\n"
f"LOW:\n{low_s}\n\nExcerpt: {strip_html(low[0]['content']['rendered'])[:400]}",
}, headers=MH)
fg.raise_for_status(); result = fg.json()
print(f"\nFocus group: {result['id']}")
for r in result.get("responses", []):
print(f"\n[{r['persona_name']}] {r['question']}\n → {r['answer']}")
comment_countis 0 for all posts, consider integrating with GA4 for pageview data (/integrations/ga4) or use themodifieddate relative todateas an engagement proxy — frequently updated posts indicate editorial investment.