import os, requests, time, math
from openai import OpenAI
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
MV = os.environ["MAVERA_API_KEY"]
MV_BASE = "https://app.mavera.io/api/v1"
MV_H = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
CONTENT_LIBRARY = [
{"title": "Getting Started with Marketing Automation",
"text": "Marketing automation platforms help teams scale personalized outreach..."},
{"title": "Email Segmentation Best Practices",
"text": "Segmenting your email list by behavior and demographics drives higher open rates..."},
{"title": "A/B Testing Your Landing Pages",
"text": "Statistical significance matters. Run tests for at least two weeks..."},
{"title": "How Acme Corp Increased Pipeline 340%",
"text": "Acme Corp migrated from manual outreach to automated sequences..."},
{"title": "API Authentication Guide",
"text": "All API requests require a Bearer token in the Authorization header..."},
]
TARGET_TOPICS = [
"social media advertising strategy", "content marketing ROI measurement",
"customer retention and churn prevention", "SEO keyword research methodology",
"video marketing for B2B", "marketing attribution modeling",
]
def cosine_sim(a, b):
dot = sum(x * y for x, y in zip(a, b))
na = math.sqrt(sum(x * x for x in a))
nb = math.sqrt(sum(x * x for x in b))
return dot / (na * nb) if na and nb else 0.0
# 1. Embed existing content
lib_texts = [f"{c['title']}: {c['text'][:500]}" for c in CONTENT_LIBRARY]
lib_resp = client.embeddings.create(model="text-embedding-3-large", input=lib_texts)
lib_vectors = [item.embedding for item in lib_resp.data]
print(f"Embedded {len(lib_vectors)} documents (dim={len(lib_vectors[0])})")
time.sleep(1)
# 2. Embed target topics and find gaps
topic_resp = client.embeddings.create(model="text-embedding-3-large", input=TARGET_TOPICS)
topic_vectors = [item.embedding for item in topic_resp.data]
gaps = []
for i, topic in enumerate(TARGET_TOPICS):
max_sim = max(cosine_sim(topic_vectors[i], lv) for lv in lib_vectors)
closest_idx = max(range(len(lib_vectors)),
key=lambda j: cosine_sim(topic_vectors[i], lib_vectors[j]))
gaps.append({"topic": topic, "max_similarity": max_sim,
"closest": CONTENT_LIBRARY[closest_idx]["title"]})
print(f" {topic:42s} → sim {max_sim:.3f} (nearest: {CONTENT_LIBRARY[closest_idx]['title'][:30]})")
gaps.sort(key=lambda g: g["max_similarity"])
threshold = 0.35
content_gaps = [g for g in gaps if g["max_similarity"] < threshold]
print(f"\nContent gaps (sim < {threshold}): {len(content_gaps)} topics")
time.sleep(1)
# 3. Generate content for top gaps via Mavera
for gap in content_gaps[:3]:
gen = requests.post(f"{MV_BASE}/generations", headers=MV_H, json={
"prompt": f"Write a 200-word blog post outline about: {gap['topic']}. "
f"Our closest content is '{gap['closest']}' (similarity: {gap['max_similarity']:.2f}). "
"Cover angles our library doesn't address. Include: headings, key points, data, CTA.",
}).json()
text = gen.get("output") or gen.get("content") or ""
print(f"\n{'='*60}\nGAP FILL: {gap['topic']}\n{'='*60}")
print(text[:1500])
time.sleep(2)