A Coding Implementation on Microsoft SkillOpt for Instrumented Prompt Optimization, Skill Evolution Analysis, and Baseline Comparison


k = RUN_KNOBS
train_out = run_cli(["python","scripts/train.py","--config",CFG,"--split_dir",SPLIT,
   "--optimizer_model",OPTIMIZER_MODEL,"--target_model",TARGET_MODEL,"--out_root",RUN,
   *COMMON,
   "train.train_size=0",
   f"train.num_epochs={k['num_epochs']}", f"train.batch_size={k['batch_size']}",
   f"gradient.minibatch_size={k['minibatch']}", f"gradient.merge_batch_size={k['merge_batch']}",
   f"gradient.analyst_workers={k['workers']}",
   f"optimizer.learning_rate={k['lr']}", f"optimizer.lr_scheduler={k['lr_sched']}",
   "optimizer.use_slow_update=true", "optimizer.use_meta_skill=true",
   f"env.workers={k['workers']}", f"env.limit={k['limit']}"],
   "TRAIN (rollout->reflect->aggregate->select->update->gate; slow-update + meta-skill)")
import pandas as pd, matplotlib.pyplot as plt
hist = json.loads(pathlib.Path(f"{RUN}/history.json").read_text())
df = pd.json_normalize(hist)
print("\nhistory.json columns:", list(df.columns))
def col(*cands):
   for c in cands:
       for actual in df.columns:
           if c in actual.lower(): return actual
   return None
c_step = col("step")
x = df[c_step] if c_step else range(len(df))
c_tr, c_va = col("train_acc","train_hard","train"), col("val_acc","val_hard","valid","val")
c_lr, c_tok = col("edit_budget","lr","learning_rate","budget"), col("token","cost")
fig, ax = plt.subplots(1, 3, figsize=(16,4))
if c_tr: ax[0].plot(x, df[c_tr], "o-", label="train acc")
if c_va: ax[0].plot(x, df[c_va], "s-", label="val acc (gate)")
if base and base["hard"] is not None: ax[0].axhline(base["hard"], ls="--", c="grey", label="baseline (seed)")
ax[0].set_title("Skill accuracy over steps"); ax[0].set_xlabel("step"); ax[0].legend(); ax[0].grid(alpha=.3)
if c_lr: ax[1].plot(x, df[c_lr], "d-", c="purple")
ax[1].set_title("Edit-budget / LR schedule (cosine)"); ax[1].set_xlabel("step"); ax[1].grid(alpha=.3)
if c_tok: ax[2].plot(x, pd.to_numeric(df[c_tok],errors="coerce").cumsum(), c="darkorange")
ax[2].set_title("Cumulative token usage"); ax[2].set_xlabel("step"); ax[2].grid(alpha=.3)
plt.tight_layout(); plt.savefig(f"{RUN}/training_dashboard.png", dpi=120); plt.show()



Source link

  • Related Posts

    Google AI Releases DiffusionGemma, a 26B MoE Open Model Using Text Diffusion for Up to 4x Faster Generation

    Google AI team including the Google DeepMind researchers have just released DiffusionGemma, an experimental open model for text generation. It uses text diffusion instead of standard autoregressive decoding. The model…

    Top AI Coding Agents and Development Platforms in 2026: Atoms, Devin, Windsurf, Cursor, Warp, and More Compared

    Development has shifted from typing code by hand to describing intent and letting agents do the work. Today’s tools plan tasks, edit across files, run tests, open pull requests, and…

    Leave a Reply

    Your email address will not be published. Required fields are marked *