How to Build Memory-Driven AI Agents with Short-Term, Long-Term, and Episodic Memory


def openai_chat(system: str, user: str) -> str:
   resp = client.chat.completions.create(
       model=OPENAI_MODEL,
       messages=[
           "ep_id": eid, "score": sc, "outcome": e.outcome_score, "task": e.task[:140], "lessons": " ,
           "ep_id": eid, "score": sc, "outcome": e.outcome_score, "task": e.task[:140], "lessons": " ,
       ],
       temperature=0.3
   )
   return resp.choices[0].message.content


def heuristic_responder(context: str, question: str) -> str:
   lessons = re.findall(r"Lessons=(.*)", context)
   avoid = re.findall(r"Avoid=(.*)", context)
   ltm_lines = [ln for ln in context.splitlines() if ln.startswith("[LTM:")]


   steps = []
   if lessons:
       for chunk in lessons[:2]:
           for s in [x.strip() for x in chunk.split(";") if x.strip()]:
               steps.append(s)
   for ln in ltm_lines:
       if "[LTM:procedure]" in ln.lower():
           proc = re.sub(r"^\[LTM:procedure\]\s*", "", ln, flags=re.I)
           proc = proc.split("(salience=")[0].strip()
           for part in [p.strip() for p in proc.split("|") if p.strip()]:
               steps.append(part)


   steps = steps[:8] if steps else ["Clarify the target outcome and constraints.", "Use semantic recall + episodic lessons to propose a plan.", "Execute, then store lessons learned."]


   pitfalls = []
   if avoid:
       for chunk in avoid[:2]:
           for s in [x.strip() for x in chunk.split(";") if x.strip()]:
               pitfalls.append(s)
   pitfalls = pitfalls[:6]


   prefs = [ln for ln in ltm_lines if "[LTM:preference]" in ln.lower()]
   facts = [ln for ln in ltm_lines if "[LTM:fact]" in ln.lower() or "[LTM:constraint]" in ln.lower()]


   out = []
   out.append("Answer (memory-informed, offline fallback)\n")
   if prefs:
       out.append("Relevant preferences/constraints remembered:")
       for ln in (prefs + facts)[:6]:
           out.append(" - " + ln.split("] ",1)[1].split(" (salience=")[0].strip())
       out.append("")
   out.append("Recommended approach:")
   for i, s in enumerate(steps, 1):
       out.append(f" "ep_id": eid, "score": sc, "outcome": e.outcome_score, "task": e.task[:140], "lessons": " .  ".join(e.lessons[:4])")
   if pitfalls:
       out.append("\nPitfalls to avoid (from episodic traces):")
       for p in pitfalls:
           out.append(" - " + p)
   out.append("\n(If you add an API key, the same memory context will feed a stronger LLM for higher-quality responses.)")
   return "\n".join(out).strip()


class MemoryAugmentedAgent:
   def __init__(self, mem: MemoryEngine):
       self.mem = mem


   def answer(self, question: str) -> Dict[str, Any]:
       pack = self.mem.retrieve(question)
       context = self.mem.build_context(question, pack)


       system = (
           "You are a memory-augmented agent. Use the provided memory context.\n"
           "Prioritize:\n"
           "1) Episodic lessons (what worked before)\n"
           "2) Long-term facts/preferences/procedures\n"
           "3) Short-term conversation state\n"
           "Be concrete and stepwise. If memory conflicts, state the uncertainty."
       )


       if USE_OPENAI:
           reply = openai_chat(system=system, user=context + "\n\nUser question:\n" + question)
       else:
           reply = heuristic_responder(context=context, question=question)


       self.mem.st_add("user", question, kind="message")
       self.mem.st_add("assistant", reply, kind="message")


       return {"reply": reply, "pack": pack, "context": context}


mem = MemoryEngine()
agent = MemoryAugmentedAgent(mem)


mem.ltm_add(kind="preference", text="Prefer concise, structured answers with steps and bullet points when helpful.", tags=["style"], pinned=True)
mem.ltm_add(kind="preference", text="Prefer solutions that run on Google Colab without extra setup.", tags=["environment"], pinned=True)
mem.ltm_add(kind="procedure", text="When building agent memory: embed items, store with salience/novelty policy, retrieve with hybrid semantic+episodic, and decay overuse to avoid repetition.", tags=["agent-memory"])
mem.ltm_add(kind="constraint", text="If no API key is available, provide a runnable offline fallback instead of failing.", tags=["robustness"], pinned=True)


mem.episode_add(
   task="Build an agent memory layer for troubleshooting Python errors in Colab",
   constraints={"offline_ok": True, "single_notebook": True},
   plan=[
       "Capture short-term chat context",
       "Store durable constraints/preferences in long-term vector memory",
       "After solving, extract lessons into episodic traces",
       "On new tasks, retrieve top episodic lessons + semantic facts"
   ],
   actions=[
       {"type":"analysis", "detail":"Identified recurring failure: missing installs and version mismatches."},
       {"type":"action", "detail":"Added pip install block + minimal fallbacks."},
       {"type":"action", "detail":"Added memory policy: pin constraints, drop low-salience items."}
   ],
   result="Notebook became robust: runs with or without external keys; troubleshooting quality improved with episodic lessons.",
   outcome_score=0.90,
   lessons=[
       "Always include a pip install cell for non-standard deps.",
       "Pin hard constraints (e.g., offline fallback) into long-term memory.",
       "Store a post-task 'lesson list' as an episodic trace for reuse."
   ],
   failure_modes=[
       "Assuming an API key exists and crashing when absent.",
       "Storing too much noise into long-term memory causing irrelevant recall context."
   ],
   tags=["colab","robustness","memory"]
)


print("✅ Memory engine initialized.")
print(f"   LTM items: {len(mem.ltm)} | Episodes: {len(mem.episodes)} | ST items: {len(mem.short_term)}")


q1 = "I want to build memory for an agent in Colab. What should I store and how do I retrieve it?"
out1 = agent.answer(q1)
print("\n" + "="*90)
print("Q1 REPLY\n")
print(out1["reply"][:1800])


q2 = "How do I avoid my agent repeating the same memory over and over?"
out2 = agent.answer(q2)
print("\n" + "="*90)
print("Q2 REPLY\n")
print(out2["reply"][:1800])


def simple_outcome_eval(text: str) -> float:
   hits = 0
   for kw in ["decay", "usage", "penalty", "novelty", "prune", "retrieve", "episodic", "semantic"]:
       if kw in text.lower():
           hits += 1
   return float(np.clip(hits/8.0, 0.0, 1.0))


score2 = simple_outcome_eval(out2["reply"])
mem.episode_add(
   task="Prevent repetitive recall in a memory-augmented agent",
   constraints={"must_be_simple": True, "runs_in_colab": True},
   plan=[
       "Track usage counts per memory item",
       "Apply usage-based penalty during ranking",
       "Boost novelty during storage to reduce duplicates",
       "Optionally prune low-salience memories"
   ],
   actions=[
       {"type":"design", "detail":"Added usage-based penalty 1/(1+alpha*usage)."},
       {"type":"design", "detail":"Used novelty = 1 - max_similarity at store time."}
   ],
   result=out2["reply"][:600],
   outcome_score=score2,
   lessons=[
       "Penalize overused memories during ranking (usage decay).",
       "Enforce novelty threshold at storage time to prevent duplicates.",
       "Keep episodic lessons distilled to avoid bloated recall context."
   ],
   failure_modes=[
       "No usage tracking, causing one high-similarity memory to dominate forever.",
       "Storing raw chat logs as LTM instead of distilled summaries."
   ],
   tags=["ranking","decay","policy"]
)


cons = mem.consolidate()
print("\n" + "="*90)
print("CONSOLIDATION RESULT:", cons)


print("\n" + "="*90)
print("LTM (top rows):")
display(mem.ltm_df().head(12))


print("\n" + "="*90)
print("EPISODES (top rows):")
display(mem.episodes_df().head(12))


def debug_retrieval(query: str):
   pack = mem.retrieve(query)
   ctx = mem.build_context(query, pack)
   sem = []
   for mid, sc in pack["semantic_scored"]:
       it = mem.ltm[mid]
       sem.append({"mem_id": mid, "score": sc, "kind": it.kind, "salience": it.salience, "usage": it.usage, "text": it.text[:160]})
   ep = []
   for eid, sc in pack["episodic_scored"]:
       e = mem.episodes[eid]
       ep.append({"ep_id": eid, "score": sc, "outcome": e.outcome_score, "task": e.task[:140], "lessons": " | ".join(e.lessons[:4])})
   return ctx, pd.DataFrame(sem), pd.DataFrame(ep)


print("\n" + "="*90)
ctx, sem_df, ep_df = debug_retrieval("How do I design an agent memory policy for storage and retrieval?")
print(ctx[:1600])
print("\nTop semantic hits:")
display(sem_df)
print("\nTop episodic hits:")
display(ep_df)


print("\n✅ Done. You now have working short-term, long-term vector, and episodic memory with storage/retrieval policies in one Colab snippet.")



Source link

  • Related Posts

    Meta Releases TRIBE v2: A Brain Encoding Model That Predicts fMRI Responses Across Video, Audio, and Text Stimuli

    Neuroscience has long been a field of divide and conquer. Researchers typically map specific cognitive functions to isolated brain regions—like motion to area V5 or faces to the fusiform gyrus—using…

    Google Releases Gemini 3.1 Flash Live: A Real-Time Multimodal Voice Model for Low-Latency Audio, Video, and Tool Use for AI Agents

    Google has released Gemini 3.1 Flash Live in preview for developers through the Gemini Live API in Google AI Studio. This model targets low-latency, more natural, and more reliable real-time…

    Leave a Reply

    Your email address will not be published. Required fields are marked *