
print("\n" + "="*70 + "\nPART 4: NDCG@10 evaluation\n" + "="*70)
eval_set = [
{"query": "Where is most ATP produced in the cell?",
"rels": {0: 2, 2: 3, 4: 2, 6: 1, 8: 3}},
{"query": "How do plants capture light energy?",
"rels": {1: 3, 9: 1}},
{"query": "How are proteins made and packaged in a cell?",
"rels": {5: 3, 7: 2}},
]
def dcg(rels):
rels = np.asarray(rels, dtype=float)
return np.sum((2**rels - 1) / np.log2(np.arange(2, rels.size + 2)))
def ndcg_at_k(ranked_doc_ids, rel_map, k=10):
gains = [rel_map.get(d, 0) for d in ranked_doc_ids[:k]]
ideal = sorted(rel_map.values(), reverse=True)[:k]
idcg = dcg(ideal)
return dcg(gains) / idcg if idcg > 0 else 0.0
base_scores, rr_scores = [], []
for ex in eval_set:
q, rel_map = ex["query"], ex["rels"]
q_emb = bi.encode(q, convert_to_tensor=True, normalize_embeddings=True)
hits = util.semantic_search(q_emb, corpus_emb, top_k=len(corpus))[0]
base_order = [h["corpus_id"] for h in hits]
base_scores.append(ndcg_at_k(base_order, rel_map))
rr = reranker.rank(q, [corpus[i] for i in base_order], convert_to_tensor=True)
rr_order = [base_order[r["corpus_id"]] for r in rr]
rr_scores.append(ndcg_at_k(rr_order, rel_map))
print(f"{'Query':45s} {'bi-encoder':>12s} {'+ zerank-2':>12s}")
for ex, b, r in zip(eval_set, base_scores, rr_scores):
print(f"{ex['query'][:43]:45s} {b:12.4f} {r:12.4f}")
print("-"*72)
print(f"{'AVERAGE NDCG@10':45s} {np.mean(base_scores):12.4f} {np.mean(rr_scores):12.4f}")
print(f"\nReranking lift: {np.mean(rr_scores)-np.mean(base_scores):+.4f} NDCG@10")






