How to Build Production-Ready Agentic Systems with Z.AI GLM-5 Using Thinking Mode, Tool Calling, Streaming, and Multi-Turn Workflows


print("\n" + "=" * 70)
print("🤖 SECTION 8: Multi-Tool Agentic Loop")
print("=" * 70)
print("Build a complete agent that can use multiple tools across turns.\n")




class GLM5Agent:


   def __init__(self, system_prompt: str, tools: list, tool_registry: dict):
       self.client = ZaiClient(api_key=API_KEY)
       self.messages = [{"role": "system", "content": system_prompt}]
       self.tools = tools
       self.registry = tool_registry
       self.max_iterations = 5


   def chat(self, user_input: str) -> str:
       self.messages.append({"role": "user", "content": user_input})


       for iteration in range(self.max_iterations):
           response = self.client.chat.completions.create(
               model="glm-5",
               messages=self.messages,
               tools=self.tools,
               tool_choice="auto",
               max_tokens=2048,
               temperature=0.6,
           )


           msg = response.choices[0].message
           self.messages.append(msg.model_dump())


           if not msg.tool_calls:
               return msg.content


           for tc in msg.tool_calls:
               fn_name = tc.function.name
               fn_args = json.loads(tc.function.arguments)
               print(f"   🔧 [{iteration+1}] {fn_name}({fn_args})")


               if fn_name in self.registry:
                   result = self.registry[fn_name](**fn_args)
               else:
                   result = {"error": f"Unknown function: {fn_name}"}


               self.messages.append({
                   "role": "tool",
                   "content": json.dumps(result, ensure_ascii=False),
                   "tool_call_id": tc.id,
               })


       return "⚠️ Agent reached maximum iterations without a final answer."




extended_tools = tools + [
   {
       "type": "function",
       "function": {
           "name": "get_current_time",
           "description": "Get the current date and time in ISO format",
           "parameters": {
               "type": "object",
               "properties": {},
               "required": [],
           },
       },
   },
   {
       "type": "function",
       "function": {
           "name": "unit_converter",
           "description": "Convert between units (length, weight, temperature)",
           "parameters": {
               "type": "object",
               "properties": {
                   "value": {"type": "number", "description": "Numeric value to convert"},
                   "from_unit": {"type": "string", "description": "Source unit (e.g., 'km', 'miles', 'kg', 'lbs', 'celsius', 'fahrenheit')"},
                   "to_unit": {"type": "string", "description": "Target unit"},
               },
               "required": ["value", "from_unit", "to_unit"],
           },
       },
   },
]




def get_current_time() -> dict:
   return {"datetime": datetime.now().isoformat(), "timezone": "UTC"}




def unit_converter(value: float, from_unit: str, to_unit: str) -> dict:
   conversions = {
       ("km", "miles"): lambda v: v * 0.621371,
       ("miles", "km"): lambda v: v * 1.60934,
       ("kg", "lbs"): lambda v: v * 2.20462,
       ("lbs", "kg"): lambda v: v * 0.453592,
       ("celsius", "fahrenheit"): lambda v: v * 9 / 5 + 32,
       ("fahrenheit", "celsius"): lambda v: (v - 32) * 5 / 9,
       ("meters", "feet"): lambda v: v * 3.28084,
       ("feet", "meters"): lambda v: v * 0.3048,
   }
   key = (from_unit.lower(), to_unit.lower())
   if key in conversions:
       result = round(conversions[key](value), 4)
       return {"value": value, "from": from_unit, "to": to_unit, "result": result}
   return {"error": f"Conversion {from_unit} → {to_unit} not supported"}




extended_registry = {
   **TOOL_REGISTRY,
   "get_current_time": get_current_time,
   "unit_converter": unit_converter,
}


agent = GLM5Agent(
   system_prompt=(
       "You are a helpful assistant with access to weather, math, time, and "
       "unit conversion tools. Use them whenever they can help answer the user's "
       "question accurately. Always show your work."
   ),
   tools=extended_tools,
   tool_registry=extended_registry,
)


print("🧑 User: What time is it? Also, if it's 28°C in Tokyo, what's that in Fahrenheit?")
print("   And what's 2^16?")
result = agent.chat(
   "What time is it? Also, if it's 28°C in Tokyo, what's that in Fahrenheit? "
   "And what's 2^16?"
)
print(f"\n🤖 Agent: {result}")




print("\n" + "=" * 70)
print("⚖️  SECTION 9: Thinking Mode ON vs OFF Comparison")
print("=" * 70)
print("See how thinking mode improves accuracy on a tricky logic problem.\n")


tricky_question = (
   "I have 12 coins. One of them is counterfeit and weighs differently than the rest. "
)


print("─── WITHOUT Thinking Mode ───")
t0 = time.time()
r_no_think = client.chat.completions.create(
   model="glm-5",
   messages=[{"role": "user", "content": tricky_question}],
   thinking={"type": "disabled"},
   max_tokens=2048,
   temperature=0.6,
)
t1 = time.time()
print(f"⏱️  Time: {t1-t0:.1f}s | Tokens: {r_no_think.usage.completion_tokens}")
print(f"📝 Answer (first 300 chars): {r_no_think.choices[0].message.content[:300]}...")


print("\n─── WITH Thinking Mode ───")
t0 = time.time()
r_think = client.chat.completions.create(
   model="glm-5",
   messages=[{"role": "user", "content": tricky_question}],
   thinking={"type": "enabled"},
   max_tokens=4096,
   temperature=0.6,
)
t1 = time.time()
print(f"⏱️  Time: {t1-t0:.1f}s | Tokens: {r_think.usage.completion_tokens}")
print(f"📝 Answer (first 300 chars): {r_think.choices[0].message.content[:300]}...")



Source link

  • Related Posts

    RightNow AI Releases AutoKernel: An Open-Source Framework that Applies an Autonomous Agent Loop to GPU Kernel Optimization for Arbitrary PyTorch Models

    Writing fast GPU code is one of the most grueling specializations in machine learning engineering. Researchers from RightNow AI want to automate it entirely. The RightNow AI research team has…

    How to Build a Netflix VOID Video Object Removal and Inpainting Pipeline with CogVideoX, Custom Prompting, and End-to-End Sample Inference

    In this tutorial, we build and run an advanced pipeline for Netflix’s VOID model. We set up the environment, install all required dependencies, clone the repository, download the official base…

    Leave a Reply

    Your email address will not be published. Required fields are marked *