NVIDIA garak Tutorial: Build a Complete Defensive LLM Red-Teaming Workflow with Custom Probes and Detectors


print("\n########## 5. ANALYSIS ##########")
import numpy as np, pandas as pd
def find_latest_report():
   cands = []
   for base in [os.path.expanduser("~/.local/share/garak/garak_runs"),
                os.path.expanduser("~/.cache/garak"), "."]:
       cands += glob.glob(os.path.join(base, "**", "*report.jsonl"),
                          recursive=True)
   cands = [c for c in cands if os.path.getsize(c) > 0]
   return max(cands, key=os.path.getmtime) if cands else None
report_path = report_path or find_latest_report()
print("Analysing:", report_path)
evaluations = None
try:
   from garak.report import Report
   rep = Report(report_path).load().get_evaluations()
   evaluations = rep.evaluations.copy()
   print("\n--- Per-probe mean SAFETY score (garak.report.Report) ---")
   print(rep.scores.round(1).to_string())
except Exception as e:
   print("garak.report.Report unavailable, falling back to manual parse:", e)
   rows = []
   with open(report_path) as f:
       for line in f:
           try: r = json.loads(line)
           except json.JSONDecodeError: continue
           if r.get("entry_type") == "eval":
               rows.append(r)
   evaluations = pd.DataFrame(rows)
   if not evaluations.empty:
       evaluations["score"] = np.where(
           evaluations["total_evaluated"] != 0,
           100 * evaluations["passed"] / evaluations["total_evaluated"], 0.0)
if evaluations is not None and not evaluations.empty:
   evaluations["asr_%"] = (100 - evaluations["score"]).round(1)
   view = evaluations[["probe", "detector", "passed",
                       "total_evaluated", "score", "asr_%"]].copy()
   view = view.rename(columns={"score": "safe_%"})
   view["safe_%"] = view["safe_%"].round(1)
   view = view.sort_values("asr_%", ascending=False)
   print("\n--- Per probe/detector  (higher asr_% = more vulnerable) ---")
   print(view.to_string(index=False))
   try:
       import matplotlib.pyplot as plt
       labels = (view["probe"] + "\n" + view["detector"]).tolist()
       plt.figure(figsize=(8, 0.55 * len(view) + 1.5))
       plt.barh(labels, view["asr_%"], color="#76b900")
       plt.gca().invert_yaxis()
       plt.xlabel("Attack Success Rate (%)"); plt.xlim(0, 100)
       plt.title("garak — vulnerability by probe/detector")
       plt.tight_layout(); plt.show()
   except Exception as e:
       print("plot skipped:", e)



Source link

  • Related Posts

    Google’s New Colab CLI Lets Developers and AI Agents Run Python on Remote Colab GPUs and TPUs From the Terminal

    This week, Google AI team released the Colab CLI. The tool connects your local terminal to remote Colab runtimes. It lets developers and AI agents run code on cloud GPUs…

    Moonshot AI Releases Kimi Code CLI: A Terminal AI Coding Agent Built in TypeScript for Next-Gen Agents

    Moonshot AI has released Kimi Code CLI, an open-source coding agent that runs in the terminal. The tool reads and edits code, runs shell commands, searches files, and fetches web…

    Leave a Reply

    Your email address will not be published. Required fields are marked *