1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
| def step(self): """
""" if self.action_agent_rollout_num_iter < 0: raise ValueError("Agent must be reset before stepping")
ai_message = self.action_agent.llm(self.messages) print(f"\033[34m****Action Agent ai message****\n{ai_message.content}\033[0m") self.conversations.append( (self.messages[0].content, self.messages[1].content, ai_message.content) ) parsed_result = self.action_agent.process_ai_message(message=ai_message) success = False if isinstance(parsed_result, dict): code = parsed_result["program_code"] + "\n" + parsed_result["exec_code"] events = self.env.step( code, programs=self.skill_manager.programs, )
self.recorder.record(events, self.task) self.action_agent.update_chest_memory(events[-1][1]["nearbyChests"])
success, critique = self.critic_agent.check_task_success( events=events, task=self.task, context=self.context, chest_observation=self.action_agent.render_chest_observation(), max_retries=5, )
if self.reset_placed_if_failed and not success: blocks = [] positions = [] for event_type, event in events: if event_type == "onSave" and event["onSave"].endswith("_placed"): block = event["onSave"].split("_placed")[0] position = event["status"]["position"] blocks.append(block) positions.append(position)
new_events = self.env.step( f"await givePlacedItemBack(bot, {U.json_dumps(blocks)}, {U.json_dumps(positions)})", programs=self.skill_manager.programs, ) events[-1][1]["inventory"] = new_events[-1][1]["inventory"] events[-1][1]["voxels"] = new_events[-1][1]["voxels"]
new_skills = self.skill_manager.retrieve_skills( query=self.context + "\n\n" + self.action_agent.summarize_chatlog(events) )
system_message = self.action_agent.render_system_message(skills=new_skills) human_message = self.action_agent.render_human_message( events=events, code=parsed_result["program_code"], task=self.task, context=self.context, critique=critique, ) self.last_events = copy.deepcopy(events) self.messages = [system_message, human_message] else: assert isinstance(parsed_result, str) self.recorder.record([], self.task) print(f"\033[34m{parsed_result} Trying again!\033[0m")
assert len(self.messages) == 2 self.action_agent_rollout_num_iter += 1 done = ( self.action_agent_rollout_num_iter >= self.action_agent_task_max_retries or success ) info = { "success": success, "conversations": self.conversations, }
if success: assert ( "program_code" in parsed_result and "program_name" in parsed_result ), "program and program_name must be returned when success" info["program_code"] = parsed_result["program_code"] info["program_name"] = parsed_result["program_name"] else: print( f"\033[32m****Action Agent human message****\n{self.messages[-1].content}\033[0m" ) return self.messages, 0, done, info
|