import os, json, requests
from openai import OpenAI
HS = os.environ["HUBSPOT_ACCESS_TOKEN"]
MV = os.environ["MAVERA_API_KEY"]
# 1. Pull form submissions (paginated)
FORM_ID = "your-form-guid"
subs, after = [], ""
while len(subs) < 200:
params = {"limit": 50}
if after: params["after"] = after
r = requests.get(f"https://api.hubapi.com/form-integrations/v1/submissions/forms/{FORM_ID}",
headers={"Authorization": f"Bearer {HS}"}, params=params)
r.raise_for_status()
data = r.json()
subs.extend(data.get("results", []))
after = data.get("paging", {}).get("next", {}).get("after", "")
if not after: break
# 2. Extract free-text
texts = []
for s in subs:
for f in s.get("values", []):
if f.get("name") == "biggest_challenge" and f.get("value", "").strip():
texts.append(f["value"].strip())
# 3. Categorize in chunks
mavera = OpenAI(api_key=MV, base_url="https://app.mavera.io/api/v1")
schema = {"type": "json_schema", "json_schema": {"name": "cats", "strict": True, "schema": {
"type": "object", "required": ["categories"],
"properties": {"categories": {"type": "array", "items": {"type": "object",
"required": ["index", "pain_point", "urgency", "use_case"],
"properties": {
"index": {"type": "number"}, "pain_point": {"type": "string"},
"urgency": {"type": "string", "enum": ["high", "medium", "low"]},
"use_case": {"type": "string"},
}}}}}}}
all_cats = []
for i in range(0, len(texts), 50):
chunk = texts[i:i+50]
numbered = "\n".join(f"{j+1}. {t[:200]}" for j, t in enumerate(chunk))
result = mavera.responses.create(model="mavera-1",
input=[{"role": "user", "content": f"Categorize these {len(chunk)} form responses by pain point, urgency, use case.\n\n{numbered}"}],
extra_body={"persona_id": os.environ.get("CUSTOMER_PERSONA_ID",""), "response_format": schema})
parsed = json.loads(result.output[0].content[0].text)
all_cats.extend(parsed.get("categories", []))
# 4. Summary
points = {}
for c in all_cats:
points[c["pain_point"]] = points.get(c["pain_point"], 0) + 1
print(f"Categorized: {len(all_cats)} | Unique pain points: {len(points)}")
for pp, n in sorted(points.items(), key=lambda x: -x[1])[:10]:
print(f" {pp}: {n}")
print(f"High urgency: {sum(1 for c in all_cats if c['urgency']=='high')}/{len(all_cats)}")