Building Reflective Prompt Optimization with GEPA: Multi-Component Prompts, Structured Feedback, and Held-Out Validation


def make_problems(n, seed=0):
   rng = random.Random(seed)
   out = []
   for _ in range(n):
       t = rng.choice(["discount", "travel", "wallet", "chain"])
       if t == "discount":
           unit  = rng.choice([40, 60, 80, 120])
           qty   = rng.choice([5, 6, 8, 10])
           disc  = rng.choice([10, 20, 25, 50])
           total = unit * qty
           gold  = total - total * disc // 100
           q = (f"A shop sells notebooks at {unit} rupees each. You buy {qty} "
                f"notebooks and get a {disc}% discount on the total bill. "
                f"How many rupees do you pay in total?")
       elif t == "travel":
           s1, h1 = rng.choice([40, 50, 60]), rng.choice([2, 3])
           s2, h2 = rng.choice([30, 45, 70]), rng.choice([1, 2, 3])
           gold = s1 * h1 + s2 * h2
           q = (f"A car drives at {s1} km/h for {h1} hours, then at {s2} km/h "
                f"for {h2} hours. What is the total distance travelled, in km?")
       elif t == "wallet":
           tens   = rng.choice([3, 5, 7, 9])
           fifties= rng.choice([2, 4, 6])
           spent  = rng.choice([50, 80, 110, 150])
           gold = tens * 10 + fifties * 50 - spent
           q = (f"You have {tens} ten-rupee notes and {fifties} fifty-rupee "
                f"notes. You spend {spent} rupees. How many rupees are left?")
       else:
           x = rng.choice([6, 9, 12, 15]); y = rng.choice([4, 7, 10]); z = rng.choice([3, 8, 11])
           gold = x * 2 - y + z
           q = (f"Start with the number {x}. Double it, then subtract {y}, "
                f"then add {z}. What number do you end with?")
       out.append({"question": q, "answer": gold})
   return out
all_problems = make_problems(18, seed=42)
random.Random(1).shuffle(all_problems)
trainset = all_problems[:12]
valset   = all_problems[12:]
print(f"Dataset: {len(trainset)} train / {len(valset)} val problems\n")



Source link

  • Related Posts

    Best 21 Low-Code and No-Code AI Tools in 2026

    Low-code and no-code platforms have moved from simple drag-and-drop builders to AI-native development environments. In 2026, most of them ship a built-in assistant that turns a text prompt into a…

    Meet Harness-1: A 20B Retrieval Subagent Trained With Reinforcement Learning Inside a Stateful Search Harness on gpt-oss-20b

    Most search agents are trained as policies over a growing transcript. The model decides how to search. It must also remember what it saw, which evidence matters, and which claims…

    Leave a Reply

    Your email address will not be published. Required fields are marked *