diff --git a/README.md b/README.md index f5e174c73..4fd5de6ae 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,6 @@ Our primary goal is to provide easy-to-use and clean functions for natural language processing tasks, enabling developers to build robust and efficient applications with ease. - - - Easy-to-Use Interface: Provides intuitive and user-friendly functions for rapid prototyping and development. - Clean Functions: Offers clean and well-structured functions, promoting readability and maintainability of code. @@ -40,7 +38,7 @@ Thought: Ramon Dekkers has been involved in controversies and crimes. I need to ``` -Project Organization +## Project Organization ------------ @@ -78,7 +76,7 @@ Project Organization │ or model summaries. │ ├── notebooks <- Jupyter notebooks. Naming convention is a number - │ (for ordering), the creator's initials, and a short `-` delimited │ description, e.g. `1.0-jqp-initial-data-exploration`. + │ (for ordering), the creator's initials, and a short `-` delimited │ description, e.g. `1.0-jqp-initial-data-exploration`. │ │ ├── references <- Data dictionaries, manuals, and all other explanatory materials. @@ -91,6 +89,6 @@ Project Organization --------- -## Contribution +## Contributing If you want to contribute, please check the [contributing.md](https://github.com/alckasoc/discussion-agents/blob/main/CONTRIBUTING.md) for guidelines! diff --git a/discussion_agents/cog/agent/expel.py b/discussion_agents/cog/agent/expel.py index 0928e4de7..8e0f403f9 100644 --- a/discussion_agents/cog/agent/expel.py +++ b/discussion_agents/cog/agent/expel.py @@ -26,7 +26,7 @@ EXPEL_REFLEXION_REACT_INSTRUCTION, RULE_PREFIX, ) -from discussion_agents.cog.prompts.react import REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES +from discussion_agents.cog.prompts.react import HOTPOTQA_FEWSHOT_EXAMPLES from discussion_agents.cog.prompts.reflexion import ( REFLEXION_REACT_INSTRUCTION, REFLEXION_REACT_REFLECT_FEWSHOT_EXAMPLES, @@ -152,7 +152,7 @@ def generate( reranker_strategy=reranker_strategy, )["fewshots"] examples = ( - examples if examples else [REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES] # type: ignore + examples if examples else [HOTPOTQA_FEWSHOT_EXAMPLES] # type: ignore ) examples = "\n\n".join(examples + [END_OF_EXAMPLES_DELIMITER]) + "\n" # type: ignore @@ -195,7 +195,7 @@ def gather_experience( keys: List[str], strategy: str = "reflexion", prompt: str = REFLEXION_REACT_INSTRUCTION, - examples: str = REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + examples: str = HOTPOTQA_FEWSHOT_EXAMPLES, reflect_examples: str = REFLEXION_REACT_REFLECT_FEWSHOT_EXAMPLES, reflect_prompt: str = REFLEXION_REACT_REFLECT_INSTRUCTION, ) -> Dict[str, Any]: @@ -214,7 +214,7 @@ def gather_experience( - "last_attempt_and_reflexion": This strategy combines the 'last_attempt' and 'reflexion' strategies. prompt (str, optional): Prompt template string. Defaults to REFLEXION_REACT_INSTRUCTION. Must include examples, reflections, question, scratchpad, and max_steps. - examples (str, optional): Fewshot examples. Defaults to REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES. + examples (str, optional): Fewshot examples. Defaults to HOTPOTQA_FEWSHOT_EXAMPLES. reflect_examples (str, optional): Reflection fewshot examples. Defaults to REFLEXION_REACT_REFLECT_FEWSHOT_EXAMPLES. reflect_prompt (str, optional): Reflect prompt template string. Defaults to REFLEXION_REACT_REFLECT_INSTRUCTION. Must include examples, question, and scratchpad. diff --git a/discussion_agents/cog/agent/react.py b/discussion_agents/cog/agent/react.py index f716e2d15..21819f75b 100644 --- a/discussion_agents/cog/agent/react.py +++ b/discussion_agents/cog/agent/react.py @@ -24,8 +24,8 @@ from discussion_agents.cog.functional.react import _is_halted, _prompt_agent from discussion_agents.cog.modules.memory.react import ReActMemory from discussion_agents.cog.prompts.react import ( - REACT_INSTRUCTION, - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, + REACT_INSTRUCTION_HOTPOTQA, ) from discussion_agents.utils.parse import parse_action, remove_newline @@ -79,8 +79,8 @@ def generate( self, question: str, reset: bool = True, - examples: str = REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, - prompt: str = REACT_INSTRUCTION, + examples: str = HOTPOTQA_FEWSHOT_EXAMPLES, + prompt: str = REACT_INSTRUCTION_HOTPOTQA, ) -> List[Tuple[str, str, str]]: """Processes a given question through ReAct. @@ -90,8 +90,8 @@ def generate( Args: question (str): The question to be processed. reset (bool, optional): Whether to reset the internal state before processing. Defaults to True. - examples (str, optional): Fewshot examples. Defaults to REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES. - prompt (str, optional): Prompt template string. Defaults to REACT_INSTRUCTION. Must include question, + examples (str, optional): Fewshot examples. Defaults to HOTPOTQA_FEWSHOT_EXAMPLES. + prompt (str, optional): Prompt template string. Defaults to REACT_INSTRUCTION_HOTPOTQA. Must include question, scratchpad, examples, and max_steps. Returns: diff --git a/discussion_agents/cog/agent/reflexion.py b/discussion_agents/cog/agent/reflexion.py index 68956d85c..cc64ccdb2 100644 --- a/discussion_agents/cog/agent/reflexion.py +++ b/discussion_agents/cog/agent/reflexion.py @@ -27,7 +27,7 @@ ReflexionCoTReflector, ReflexionReActReflector, ) -from discussion_agents.cog.prompts.react import REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES +from discussion_agents.cog.prompts.react import HOTPOTQA_FEWSHOT_EXAMPLES from discussion_agents.cog.prompts.reflexion import ( REFLEXION_COT_FEWSHOT_EXAMPLES, REFLEXION_COT_FEWSHOT_EXAMPLES_NO_CONTEXT, @@ -371,7 +371,7 @@ def generate( self, question: str, key: str, - examples: str = REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + examples: str = HOTPOTQA_FEWSHOT_EXAMPLES, strategy: Optional[str] = None, reset: bool = True, prompt: str = REFLEXION_REACT_INSTRUCTION, @@ -386,7 +386,7 @@ def generate( Args: question (str): The question to be processed. key (str): The answer to the question. - examples (str, optional): Fewshot examples. Defaults to REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES. + examples (str, optional): Fewshot examples. Defaults to HOTPOTQA_FEWSHOT_EXAMPLES. strategy (Optional[str]): The reflection strategy. Can be of 3 types. Defaults to None. - "last_attempt": This strategy uses only 'question' and 'scratchpad'. The 'reflections' list is updated with the current scratchpad. - "reflexion": This strategy uses all the parameters. It adds a new reflexion generated by the language model to the 'reflections' list. diff --git a/discussion_agents/cog/functional/expel.py b/discussion_agents/cog/functional/expel.py index 135b3c0d3..ea982ccb1 100644 --- a/discussion_agents/cog/functional/expel.py +++ b/discussion_agents/cog/functional/expel.py @@ -21,7 +21,7 @@ SYSTEM_CRITIQUE_EXISTING_INSIGHTS_INSTRUCTION, SYSTEM_TEMPLATE, ) -from discussion_agents.cog.prompts.react import REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES +from discussion_agents.cog.prompts.react import HOTPOTQA_FEWSHOT_EXAMPLES from discussion_agents.cog.prompts.reflexion import ( REFLEXION_REACT_INSTRUCTION, REFLEXION_REACT_REFLECT_FEWSHOT_EXAMPLES, @@ -37,7 +37,7 @@ def gather_experience( keys: List[str], strategy: str = "reflexion", prompt: str = REFLEXION_REACT_INSTRUCTION, - examples: str = REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + examples: str = HOTPOTQA_FEWSHOT_EXAMPLES, reflect_examples: str = REFLEXION_REACT_REFLECT_FEWSHOT_EXAMPLES, reflect_prompt: str = REFLEXION_REACT_REFLECT_INSTRUCTION, ) -> Dict[str, List]: @@ -52,7 +52,7 @@ def gather_experience( strategy (str, optional): The strategy used to generate experiences. Defaults to "reflexion" if not specified. prompt (str, optional): Prompt template string. Defaults to REFLEXION_REACT_INSTRUCTION. Must include examples, reflections, question, scratchpad, and max_steps. - examples (str, optional): Fewshot examples. Defaults to REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES. + examples (str, optional): Fewshot examples. Defaults to HOTPOTQA_FEWSHOT_EXAMPLES. reflect_examples (str, optional): Reflection fewshot examples. Defaults to REFLEXION_REACT_REFLECT_FEWSHOT_EXAMPLES. reflect_prompt (str, optional): Reflect prompt template string. Defaults to REFLEXION_REACT_REFLECT_INSTRUCTION. Must include examples, question, and scratchpad. diff --git a/discussion_agents/cog/functional/generative_agents.py b/discussion_agents/cog/functional/generative_agents.py index 4afe0bfbe..a85a70154 100644 --- a/discussion_agents/cog/functional/generative_agents.py +++ b/discussion_agents/cog/functional/generative_agents.py @@ -12,7 +12,7 @@ from langchain.embeddings import HuggingFaceEmbeddings from langchain.prompts import PromptTemplate from langchain.retrievers import TimeWeightedVectorStoreRetriever -from langchain.vectorstores import FAISS +from langchain_community.vectorstores.faiss import FAISS from langchain_core.retrievers import BaseRetriever from discussion_agents.utils.fetch import fetch_memories diff --git a/discussion_agents/cog/functional/react.py b/discussion_agents/cog/functional/react.py index 3cba42f02..f780ba991 100644 --- a/discussion_agents/cog/functional/react.py +++ b/discussion_agents/cog/functional/react.py @@ -5,7 +5,7 @@ from tiktoken import Encoding from discussion_agents.cog.prompts.react import ( - REACT_INSTRUCTION, + REACT_INSTRUCTION_HOTPOTQA, ) from discussion_agents.utils.parse import remove_newline @@ -15,7 +15,7 @@ def _build_agent_prompt( scratchpad: str, examples: str, max_steps: int, - prompt: str = REACT_INSTRUCTION, + prompt: str = REACT_INSTRUCTION_HOTPOTQA, ) -> str: """Constructs a prompt template for the agent. @@ -27,7 +27,7 @@ def _build_agent_prompt( scratchpad (str): Additional scratchpad information to be included. examples (str): Fewshot examples. max_steps (int): Max number of steps. - prompt (str, optional): Prompt template string. Defaults to REACT_INSTRUCTION. Must include question, + prompt (str, optional): Prompt template string. Defaults to REACT_INSTRUCTION_HOTPOTQA. Must include question, scratchpad, examples, and max_steps. Returns: @@ -48,7 +48,7 @@ def _prompt_agent( scratchpad: str, examples: str, max_steps: int, - prompt: str = REACT_INSTRUCTION, + prompt: str = REACT_INSTRUCTION_HOTPOTQA, ) -> str: """Generates a response from the LLM based on a given question and scratchpad. @@ -61,7 +61,7 @@ def _prompt_agent( scratchpad (str): Additional context or information for the language model. examples (str): Fewshot examples. max_steps (int): Maximum number of steps. - prompt (str, optional): Prompt template string. Defaults to REACT_INSTRUCTION. Must include question, + prompt (str, optional): Prompt template string. Defaults to REACT_INSTRUCTION_HOTPOTQA. Must include question, scratchpad, examples, and max_steps. Returns: @@ -94,7 +94,7 @@ def _is_halted( max_steps: int, max_tokens: int, enc: Encoding, - prompt: str = REACT_INSTRUCTION, + prompt: str = REACT_INSTRUCTION_HOTPOTQA, ) -> bool: """Determines whether the agent's operation should be halted. @@ -111,7 +111,7 @@ def _is_halted( max_steps (int): Maximum allowed steps. max_tokens (int): Maximum allowed token count. enc (Encoding): The encoder to calculate token length. - prompt (str, optional): Prompt template string. Defaults to REACT_INSTRUCTION. Must include question, + prompt (str, optional): Prompt template string. Defaults to REACT_INSTRUCTION_HOTPOTQA. Must include question, scratchpad, examples, and max_steps. Returns: diff --git a/discussion_agents/cog/prompts/react.py b/discussion_agents/cog/prompts/react.py index 63ab9ee31..8f4345b04 100644 --- a/discussion_agents/cog/prompts/react.py +++ b/discussion_agents/cog/prompts/react.py @@ -1,7 +1,10 @@ """ReAct prompts and fewshot examples.""" -REACT_INSTRUCTION = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types: +# ======================================================================== HOTPOTQA ======================================================================== # + + +REACT_INSTRUCTION_HOTPOTQA = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types: (1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search. (2) Lookup[keyword], which returns the next sentence containing keyword in the last passage successfully found by Search. (3) Finish[answer], which returns the answer and finishes the task. @@ -13,7 +16,7 @@ Question: {question}{scratchpad}""" -REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES = """Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into? +HOTPOTQA_FEWSHOT_EXAMPLES = """Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into? Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area. Action 1: Search[Colorado orogeny] Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas. @@ -74,3 +77,45 @@ Observation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist. Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work. Action 3: Finish[yes]""" + + +# ======================================================================== FEVER ======================================================================== # + + +REACT_INSTRUCTION_FEVER = """Determine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. and Action can be two types: +(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search. +(3) Finish[answer], which returns the answer and finishes the task. +You have a maximum of {max_steps} steps. + +Here are some examples: +{examples} +(END OF EXAMPLES) + +Question: {question}{scratchpad}""" + +FEVER_FEWSHOT_EXAMPLES = """Claim: Nikolaj Coster-Waldau worked with the Fox Broadcasting Company. +Thought 1: I need to search Nikolaj Coster-Waldau and find if he has worked with the Fox Broadcasting Company. +Action 1: Search[Nikolaj Coster-Waldau] +Observation 1: Nikolaj William Coster-Waldau (born 27 July 1970) is a Danish actor and producer. He graduated from the Danish National School of Performing Arts in Copenhagen in 1993,[1] and had his breakthrough role in Denmark with the film Nightwatch (1994). He played Jaime Lannister in the HBO fantasy drama series Game of Thrones, for which he received two Primetime Emmy Award nominations for Outstanding Supporting Actor in a Drama Series.. Coster-Waldau has appeared in numerous films in his native Denmark and Scandinavia, including Headhunters (2011) and A Thousand Times Good Night (2013). In the U.S, his debut film role was in the war film Black Hawk Down (2001), playing Medal of Honor recipient Gary Gordon.[2] He then played a detective in the short-lived Fox television series New Amsterdam (2008), and appeared in the 2009 Fox television film Virtuality, originally intended as a pilot. +Thought 2: Because he "appeared in the 2009 Fox television film Virtuality", he should have worked with the Fox Broadcasting Company. +Action 2: Finish[SUPPORTS] + +Claim: Stranger Things is set in Bloomington, Indiana. +Thought 1: I should search for Stranger Things, and see if it is set in Bloomington, Indiana. +Action 1: Search[Stranger Things] +Observation 1: Stranger Things is an American science fiction horror drama television series created by the Duffer Brothers. Set in the 1980s, primarily in the fictional town of Hawkins, Indiana, the series centers on a number of mysteries and supernatural events occurring around the town and their impact on an ensemble of child and adult characters. +Thought 2: The observation says that it is set in a "fictional town of Hawkins, Indiana", so it is not set in Bloomington. +Action 2: Finish[REFUTES] + +Claim: Beautiful reached number two on the Billboard Hot 100 in 2003.? +Thought 1: I need to search the song Beautiful and find if it reached number two on the Billboard Hot 100 in 2003. +Action 1: Search[Beautiful] +Observation 1: Could not find [Beautiful]. Similar: ['Beautiful', 'Beautiful, Beautiful', 'A Beautiful Mind (film)', 'Beautiful (Christina Aguilera song)', 'Life Is Beautiful']. +Thought 2: From suggestions, I should search "Beautiful (Christina Aguilera song)" to find the song. +Action 2: Search[Beautiful (Christina Aguilera song)] +Observation 2: "Beautiful" is a song recorded by American singer Christina Aguilera for her fourth studio album, Stripped (2002). +Thought 3: It does not mention Billboard, so I need to look up "Billboard Hot 100" to find if it reached number two on it in 2003. +Action 3: Lookup[Billboard Hot 100] +Observation 3: (Result 1 / 3) The song peaked at number two on the Billboard Hot 100 in the United States, where it was certified Gold for 500,000 units shipped. +Thought 4: It only says the song peaked at number two on the Billboard Hot 100, but not if it was in 2003. I am not sure if this claim is true or not. +Action 4: Finish[NOT ENOUGH INFO]""" diff --git a/test_environment.py b/test_environment.py index d0ac4a725..8fd77eaf4 100644 --- a/test_environment.py +++ b/test_environment.py @@ -22,4 +22,4 @@ def main(): if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/tests/cog/agent/test_react.py b/tests/cog/agent/test_react.py index 6ef7e720a..f6bd7642c 100644 --- a/tests/cog/agent/test_react.py +++ b/tests/cog/agent/test_react.py @@ -7,6 +7,10 @@ from discussion_agents.cog.agent.react import ReActAgent, ZeroShotReActAgent from discussion_agents.cog.modules.memory.react import ReActMemory +from discussion_agents.cog.prompts.react import ( + HOTPOTQA_FEWSHOT_EXAMPLES, + REACT_INSTRUCTION_HOTPOTQA, +) def test_init() -> None: @@ -31,24 +35,27 @@ def test_generate() -> None: q = 'Who was once considered the best kick boxer in the world, however he has been involved in a number of controversies relating to his "unsportsmanlike conducts" in the sport and crimes of violence outside of the ring' # Test a single max step and verify no more steps can be taken. + gt_out = "Thought: I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\nAction: Search[best kick boxer]" responses = [ ' I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\nAction: Search[best kick boxer in the world]\nObservation: (Result 1/1) Ramon Dekkers is considered by many to be the best kickboxer in the world.\nThought: It mentions "unsportsmanlike conducts" and crimes of violence. I need to find more information about Ramon Dekkers.\nAction: Lookup[crimes]\nObservation: (Result 1/1) Dekkers was involved in a number of controversies relating to his "unsportsmanlike conducts" in the sport and crimes of violence outside of the ring.\nThought: Ramon Dekkers has been involved in controversies and crimes. I need to find more information about them.\nAction: Lookup[controversies]\nObservation: (Result 1/1) Dekkers was known for his aggressive style and has been involved in a number of controversies, including a bar brawl and an altercation with a bouncer.\nThought: It mentions "unsportsmanlike conducts" and crimes of violence. I need to find more information about the controversies and crimes.\nAction: Lookup[unsportsmanlike conducts]\nObservation: (Result', ' Search[best kick boxer]\nObservation: The best kick boxer in the world is often a highly debated topic, but some notable names include Semmy Schilt, Peter Aerts, Ernesto Hoost, and Ramon Dekkers.\nThought: Since the question mentions controversies and crimes, I should focus on more recent kick boxers. I will look up the controversies and crimes of Semmy Schilt.\nAction: Lookup[controversies and crimes]\nObservation: (Result 1/1) Semmy Schilt has been involved in several controversies, including accusations of using performance-enhancing drugs and unsportsmanlike conducts such as eye-gouging and low blows.\nThought: The question mentions "unsportsmanlike conducts" specifically, so I will look up more information on those incidents.\nAction: Lookup[unsportsmanlike conducts]\nObservation: (Result 1/1) Semmy Schilt has been known for his aggressive and sometimes controversial fighting style, with incidents such as eye-gouging and low blows being reported by his opponents.\nThought: The question also mentions crimes outside of the ring, so I will search for any criminal record or charges against Semmy Schilt.\nAction: Search[Semmy Schilt criminal record]\nObservation', ] llm = FakeListChatModel(responses=responses) agent = ReActAgent(llm=llm, max_steps=1) - out = agent.generate(question=q) - - gt_out = ( - "Thought: I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\n" - "Action: Search[best kick boxer]" + out = agent.generate( + question=q, + examples=HOTPOTQA_FEWSHOT_EXAMPLES, + prompt=REACT_INSTRUCTION_HOTPOTQA, ) + assert out assert isinstance(out, list) for triplet in out: assert isinstance(triplet, tuple) - assert "\n".join(out[0][:2]) == gt_out assert agent._step_n == agent.max_steps + 1 assert not agent._finished + assert "\n".join(out[0][:2]) == gt_out + scratchpad = "\n".join(agent.retrieve()["scratchpad"].split("\n")[:-1]) + assert scratchpad.strip() == gt_out # Verify no more steps can be taken. out = agent.generate(question=q, reset=False) @@ -58,11 +65,18 @@ def test_generate() -> None: assert isinstance(triplet, tuple) assert agent._step_n == agent.max_steps + 1 assert not agent._finished - scratchpad = "\n".join(agent.retrieve()["scratchpad"].split("\n")[:-1]) assert scratchpad.strip() == gt_out # Test agent runs out of tokens (must ensure that max_steps is not reached and task is not finished). + gt_out = ( + "Thought: I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\n" + "Action: INVALID[best kick boxer]\n" + "Observation 1: Invalid Action. Valid Actions are Lookup[] Search[] and Finish[].\n" + "Thought: I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\n" + "Action: INVALID[best kick boxer]\n" + "Observation 2: Invalid Action. Valid Actions are Lookup[] Search[] and Finish[]." + ) responses = [ ' I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\nAction: Search[best kick boxer in the world]\nObservation: (Result 1/1) Ramon Dekkers is considered by many to be the best kickboxer in the world.\nThought: It mentions "unsportsmanlike conducts" and crimes of violence. I need to find more information about Ramon Dekkers.\nAction: Lookup[crimes]\nObservation: (Result 1/1) Dekkers was involved in a number of controversies relating to his "unsportsmanlike conducts" in the sport and crimes of violence outside of the ring.\nThought: Ramon Dekkers has been involved in controversies and crimes. I need to find more information about them.\nAction: Lookup[controversies]\nObservation: (Result 1/1) Dekkers was known for his aggressive style and has been involved in a number of controversies, including a bar brawl and an altercation with a bouncer.\nThought: It mentions "unsportsmanlike conducts" and crimes of violence. I need to find more information about the controversies and crimes.\nAction: Lookup[unsportsmanlike conducts]\nObservation: (Result', " INVALID[best kick boxer]\n", @@ -72,16 +86,6 @@ def test_generate() -> None: llm=llm, max_steps=3, max_tokens=1750 ) # 3 steps leads to 1774 tokens. out = agent.generate(question=q) - - gt_out = ( - "Thought: I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\n" - "Action: INVALID[best kick boxer]\n" - "Observation 1: Invalid Action. Valid Actions are Lookup[] Search[] and Finish[].\n" - "Thought: I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\n" - "Action: INVALID[best kick boxer]\n" - "Observation 2: Invalid Action. Valid Actions are Lookup[] Search[] and Finish[]." - ) - assert isinstance(out, list) for triplet in out: assert isinstance(triplet, tuple) @@ -89,6 +93,12 @@ def test_generate() -> None: assert agent.memory.load_memories()["scratchpad"].strip() == gt_out # Test full trajectoy/trial till finish. + gt_out = ( + "\n" + "Thought: I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\n" + "Action: Finish[Badr Hari]\n" + "Observation 1: Badr Hari" + ) responses = [ ' I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\nAction: Search[best kick boxer in the world]\nObservation: (Result 1/1) Ramon Dekkers is considered by many to be the best kickboxer in the world.\nThought: It mentions "unsportsmanlike conducts" and crimes of violence. I need to find more information about Ramon Dekkers.\nAction: Lookup[crimes]\nObservation: (Result 1/1) Dekkers was involved in a number of controversies relating to his "unsportsmanlike conducts" in the sport and crimes of violence outside of the ring.\nThought: Ramon Dekkers has been involved in controversies and crimes. I need to find more information about them.\nAction: Lookup[controversies]\nObservation: (Result 1/1) Dekkers was known for his aggressive style and has been involved in a number of controversies, including a bar brawl and an altercation with a bouncer.\nThought: It mentions "unsportsmanlike conducts" and crimes of violence. I need to find more information about the controversies and crimes.\nAction: Lookup[unsportsmanlike conducts]\nObservation: (Result', " Finish[Badr Hari]\n", @@ -96,12 +106,6 @@ def test_generate() -> None: llm = FakeListChatModel(responses=responses) agent = ReActAgent(llm=llm, max_steps=5) out = agent.generate(question=q) - gt_out = ( - "\n" - "Thought: I need to search for the best kick boxer in the world, and then find any controversies or crimes they have been involved in.\n" - "Action: Finish[Badr Hari]\n" - "Observation 1: Badr Hari" - ) assert isinstance(out, list) for triplet in out: assert isinstance(triplet, tuple) diff --git a/tests/cog/functional/test_react_functional.py b/tests/cog/functional/test_react_functional.py index ac7dbccf9..c4dece46a 100644 --- a/tests/cog/functional/test_react_functional.py +++ b/tests/cog/functional/test_react_functional.py @@ -8,7 +8,7 @@ _is_halted, _prompt_agent, ) -from discussion_agents.cog.prompts.react import REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES +from discussion_agents.cog.prompts.react import HOTPOTQA_FEWSHOT_EXAMPLES def test__build_agent_prompt() -> None: @@ -16,7 +16,7 @@ def test__build_agent_prompt() -> None: prompt = _build_agent_prompt( question="", scratchpad="", - examples=REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + examples=HOTPOTQA_FEWSHOT_EXAMPLES, max_steps=1, ) @@ -112,7 +112,7 @@ def test__prompt_agent() -> None: llm=FakeListChatModel(responses=["1"]), question="", scratchpad="", - examples=REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + examples=HOTPOTQA_FEWSHOT_EXAMPLES, max_steps=1, ) assert isinstance(out, str) @@ -123,7 +123,7 @@ def test__prompt_agent() -> None: llm=FakeListChatModel(responses=["1"]), question="", scratchpad="", - examples=REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + examples=HOTPOTQA_FEWSHOT_EXAMPLES, max_steps=1, prompt="{question} {scratchpad} {examples} {max_steps}", ) @@ -141,7 +141,7 @@ def test__is_halted() -> None: 1, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, 10, 100, gpt3_5_turbo_enc, @@ -153,7 +153,7 @@ def test__is_halted() -> None: 11, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, 10, 100, gpt3_5_turbo_enc, @@ -165,7 +165,7 @@ def test__is_halted() -> None: 1, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, 10, 10, gpt3_5_turbo_enc, @@ -177,7 +177,7 @@ def test__is_halted() -> None: 1, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, 10, 100000, gpt3_5_turbo_enc, @@ -189,7 +189,7 @@ def test__is_halted() -> None: 10, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, 10, 100, gpt3_5_turbo_enc, @@ -201,7 +201,7 @@ def test__is_halted() -> None: 1, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, 10, 1603, gpt3_5_turbo_enc, @@ -213,7 +213,7 @@ def test__is_halted() -> None: 1, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, 10, 1603, gpt3_5_turbo_enc, diff --git a/tests/cog/functional/test_reflexion_functional.py b/tests/cog/functional/test_reflexion_functional.py index b39c518f3..56be3fb9b 100644 --- a/tests/cog/functional/test_reflexion_functional.py +++ b/tests/cog/functional/test_reflexion_functional.py @@ -26,7 +26,7 @@ react_reflect_last_attempt_and_reflexion, react_reflect_reflexion, ) -from discussion_agents.cog.prompts.react import REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES +from discussion_agents.cog.prompts.react import HOTPOTQA_FEWSHOT_EXAMPLES from discussion_agents.cog.prompts.reflexion import ( REFLEXION_COT_FEWSHOT_EXAMPLES, REFLEXION_COT_FEWSHOT_EXAMPLES_NO_CONTEXT, @@ -507,7 +507,7 @@ def test__prompt_react_agent() -> None: gt_out = "I need to search for VIVA Media AG and find out what their new acronym stands for.Action: Search[VIVA Media AG]" out = _prompt_react_agent( llm=FakeListChatModel(responses=responses), - examples=REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + examples=HOTPOTQA_FEWSHOT_EXAMPLES, reflections="", question=q, scratchpad="\nThought:", @@ -604,7 +604,7 @@ def test__prompt_react_agent() -> None: ) out = _prompt_react_agent( llm=FakeListChatModel(responses=responses), - examples=REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + examples=HOTPOTQA_FEWSHOT_EXAMPLES, reflections=reflections, question=q, scratchpad=scratchpad, @@ -623,7 +623,7 @@ def test__is_halted() -> None: 1, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, "", 10, 100, @@ -636,7 +636,7 @@ def test__is_halted() -> None: 11, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, "", 10, 100, @@ -649,7 +649,7 @@ def test__is_halted() -> None: 1, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, "", 10, 10, @@ -662,7 +662,7 @@ def test__is_halted() -> None: 1, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, "", 10, 100000, @@ -675,7 +675,7 @@ def test__is_halted() -> None: 10, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, "", 10, 100, @@ -688,7 +688,7 @@ def test__is_halted() -> None: 1, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, "", 10, 1603, @@ -701,7 +701,7 @@ def test__is_halted() -> None: 1, "question", "scratchpad", - REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES, + HOTPOTQA_FEWSHOT_EXAMPLES, "", 10, 1603, diff --git a/tests/cog/modules/memory/test_expel_memory.py b/tests/cog/modules/memory/test_expel_memory.py index fcf49aa88..6cc4ffe58 100644 --- a/tests/cog/modules/memory/test_expel_memory.py +++ b/tests/cog/modules/memory/test_expel_memory.py @@ -12,15 +12,11 @@ ExpeLExperienceMemory, ExpeLInsightMemory, ) -from discussion_agents.cog.prompts.react import REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES +from discussion_agents.cog.prompts.react import HOTPOTQA_FEWSHOT_EXAMPLES -fewshot_questions = re.findall( - r"Question: (.+?)\n", REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES -) -fewshot_keys = re.findall( - r"Action \d+: Finish\[(.+?)\]", REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES -) -blocks = re.split(r"(?=Question: )", REACT_WEBTHINK_SIMPLE6_FEWSHOT_EXAMPLES)[ +fewshot_questions = re.findall(r"Question: (.+?)\n", HOTPOTQA_FEWSHOT_EXAMPLES) +fewshot_keys = re.findall(r"Action \d+: Finish\[(.+?)\]", HOTPOTQA_FEWSHOT_EXAMPLES) +blocks = re.split(r"(?=Question: )", HOTPOTQA_FEWSHOT_EXAMPLES)[ 1: ] # Split and ignore the first empty result