import os, requests, time, base64
from collections import defaultdict
GH_KEY = os.environ["GREENHOUSE_API_KEY"]
MV = os.environ["MAVERA_API_KEY"]
GH_BASE = "https://harvest.greenhouse.io/v1"
gh_auth = base64.b64encode(f"{GH_KEY}:".encode()).decode()
GH_H = {"Authorization": f"Basic {gh_auth}"}
# 1. Pull recent applications with outcomes
applications = []
page = 1
while len(applications) < 300:
batch = requests.get(f"{GH_BASE}/applications",
headers=GH_H,
params={"per_page": 100, "page": page, "status": "hired"}).json()
if not batch:
break
applications.extend(batch)
page += 1
time.sleep(0.3)
rejected_apps = []
page = 1
while len(rejected_apps) < 300:
batch = requests.get(f"{GH_BASE}/applications",
headers=GH_H,
params={"per_page": 100, "page": page, "status": "rejected"}).json()
if not batch:
break
rejected_apps.extend(batch)
page += 1
time.sleep(0.3)
hired_app_ids = {a["id"] for a in applications}
rejected_app_ids = {a["id"] for a in rejected_apps}
# 2. Pull scorecards for these applications
hired_scores = defaultdict(list)
rejected_scores = defaultdict(list)
for app_id in list(hired_app_ids)[:50]:
scorecards = requests.get(f"{GH_BASE}/applications/{app_id}/scorecards",
headers=GH_H).json()
for sc in scorecards:
for attr in sc.get("attributes", []):
name = attr.get("name", "Unknown")
rating = attr.get("rating", "")
if rating:
hired_scores[name].append(rating)
time.sleep(0.25)
for app_id in list(rejected_app_ids)[:50]:
scorecards = requests.get(f"{GH_BASE}/applications/{app_id}/scorecards",
headers=GH_H).json()
for sc in scorecards:
for attr in sc.get("attributes", []):
name = attr.get("name", "Unknown")
rating = attr.get("rating", "")
if rating:
rejected_scores[name].append(rating)
time.sleep(0.25)
# 3. Build analysis context
RATING_MAP = {"definitely_not": 1, "no": 2, "mixed": 3, "yes": 4, "strong_yes": 5}
analysis_lines = []
all_attrs = set(hired_scores.keys()) | set(rejected_scores.keys())
for attr in sorted(all_attrs):
h_vals = [RATING_MAP.get(r, 3) for r in hired_scores.get(attr, [])]
r_vals = [RATING_MAP.get(r, 3) for r in rejected_scores.get(attr, [])]
h_avg = sum(h_vals) / len(h_vals) if h_vals else 0
r_avg = sum(r_vals) / len(r_vals) if r_vals else 0
delta = h_avg - r_avg
analysis_lines.append(f"- {attr}: Hired avg={h_avg:.1f} Rejected avg={r_avg:.1f} Delta={delta:+.1f}")
analysis_block = "\n".join(analysis_lines)
# 4. Mave synthesis
mave = requests.post("https://app.mavera.io/api/v1/mave/chat",
headers={"Authorization": f"Bearer {MV}", "Content-Type": "application/json"},
json={"message": f"""Analyze these interview scorecard attributes comparing hired vs rejected candidates.
SCORECARD DATA ({len(hired_app_ids)} hired, {len(rejected_app_ids)} rejected applications analyzed):
{analysis_block}
Produce:
1. Top 5 traits that predict successful hires (highest delta)
2. Traits that DON'T predict success (low delta — possible interviewer bias)
3. Recommended interview rubric adjustments
4. Calibration notes for interviewers
5. A "success persona" profile summarizing the ideal candidate"""}).json()
print("=== Hiring Success Persona ===")
print(mave.get("content", "")[:2000])