import os, requests, time, re
from collections import Counter
G2 = os.environ["G2_API_KEY"]
MV = os.environ["MAVERA_API_KEY"]
G2_BASE = "https://data.g2.com/api/v1"
G2_H = {"Authorization": f"Token token={G2}", "Content-Type": "application/vnd.api+json"}
MV_H = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
# 1. Pull own product reviews
reviews = []
page = 1
while len(reviews) < 200:
r = requests.get(f"{G2_BASE}/survey-responses",
headers=G2_H,
params={"page[size]": 50, "page[number]": page})
if r.status_code == 429:
time.sleep(1)
continue
r.raise_for_status()
data = r.json().get("data", [])
if not data:
break
reviews.extend(data)
page += 1
time.sleep(0.1)
# 2. Extract love and hate phrases
love_texts = []
hate_texts = []
for rev in reviews:
attrs = rev.get("attributes", {})
for key, val in attrs.get("comment_answers", {}).items():
text = val if isinstance(val, str) else val.get("text", "")
if not text.strip():
continue
k = key.lower()
if "love" in k or "best" in k or "like most" in k:
love_texts.append(text)
elif "dislike" in k or "hate" in k or "don't like" in k or "cons" in k:
hate_texts.append(text)
# 3. Extract key phrases
def extract_phrases(texts, min_count=2):
words = Counter()
bigrams = Counter()
for t in texts:
clean = re.sub(r'[^\w\s]', '', t.lower())
tokens = clean.split()
words.update(tokens)
for i in range(len(tokens) - 1):
bigrams[f"{tokens[i]} {tokens[i+1]}"] += 1
stop = {"the", "a", "an", "is", "it", "to", "and", "of", "in", "for", "that", "this", "with", "on", "i", "we", "our"}
phrases = [(p, c) for p, c in bigrams.items() if c >= min_count and not all(w in stop for w in p.split())]
return sorted(phrases, key=lambda x: -x[1])[:20]
love_phrases = extract_phrases(love_texts)
hate_phrases = extract_phrases(hate_texts)
preferred_terms = [p for p, _ in love_phrases[:15]]
avoid_terms = [p for p, _ in hate_phrases[:15]]
# 4. Create Brand Voice
love_samples = "\n\n---\n\n".join(love_texts[:15])
bv = requests.post(f"{MV_BASE}/brand-voices",
headers=MV_H,
json={
"name": "G2 Customer Voice",
"samples": [love_samples],
"preferred_terms": preferred_terms,
"avoid_terms": avoid_terms,
}).json()
print(f"Brand Voice: {bv['id']}")
print(f"\nPreferred terms ({len(preferred_terms)}):")
for p, c in love_phrases[:10]:
print(f" ✓ '{p}' ({c}x)")
print(f"\nAvoid terms ({len(avoid_terms)}):")
for p, c in hate_phrases[:10]:
print(f" ✗ '{p}' ({c}x)")
# 5. Test with a generation
from openai import OpenAI
mavera = OpenAI(api_key=MV, base_url=MV_BASE)
test = mavera.responses.create(model="mavera-1",
input=[{"role": "user", "content": "Write a 100-word product description for our landing page."}],
extra_body={"brand_voice_id": bv["id"]})
print(f"\n--- Test Generation ---\n{test.output[0].content[0].text}")