Commit c3d9cf43 by Jonathan Thomas

Merge branch 'improved-chat-prompt' into 'develop'

Improved chat prompt (less tokens, compare outputs, improved behavior reliability)

See merge request !29
parents 0c75b901 083a2384
Pipeline #13348 passed with stages
in 2 minutes 13 seconds
......@@ -8,16 +8,29 @@ All notable changes to **CreatureChat** are documented in this file. The format
### Added
- Wither now drops a Nether Star at max friendship (for pacifists)
- Added Entity Maturity (baby or adult) into system-chat prompt
- Added LLM Comparison HTML Output (for human eval of different LLMs with CreatureChat)
- Rate limiter for LLM unit tests (to prevent rate limit issues from certain providers when running all tests)
- Added many new speaking styles (minimalist, nerdy, stupid, gen-z, old timer, boomer, etc...)
- Rate limiter for LLM unit tests (to prevent rate limit issues from certain providers when running all tests)
- Check friendship direction (+ or -) in LLM unit tests (to verify friendship is output correctly)
### Changed
- Broadcasting and receiving chat messages now ignores if the UUID is valid (to keep data synced)
- Improved error handling to prevent broken "..." pending chat status. (HTTP and message processing is more protected)
- Improved LLM unit tests to check for both a positive and negative behaviors (i.e. FOLLOW and not LEAD, ATTACK and not FLEE, etc...)
- Simplified system-chat prompt (less tokens), rounded health & hunger values, and improved variety of examples (less tokens)
- Improved LLM unit tests to check for both a positive and negative behaviors (i.e. FOLLOW and not LEAD, ATTACK and not FLEE, etc...)
- Check friendship direction (+ or -) in LLM unit tests (to verify friendship is output correctly)
- Removed a few variables from the chat context (creative mode, hardcore, difficulty)
### Fixed
- Bees no longer forget their chat data when entering/leaving hives (writeNbt & readNbt modified)
- Vexes no longer take damage when chat data exists
- Wandering Trader no longer despawns if it has chat data
- Removed randomized error messages from chat history (so it doesn't break the chat history when an error is shown)
- Reduced death message output in logs to use DEBUG log level
- Fixed unit tests for friendship (some were being skipped)
## [1.3.0] - 2025-01-14
......
......@@ -166,7 +166,7 @@ public class EntityChatData {
// Add PLAYER context information
Map<String, String> contextData = new HashMap<>();
contextData.put("player_name", player.getDisplayName().getString());
contextData.put("player_health", player.getHealth() + "/" + player.getMaxHealth());
contextData.put("player_health", Math.round(player.getHealth()) + "/" + Math.round(player.getMaxHealth()));
contextData.put("player_hunger", String.valueOf(player.getHungerManager().getFoodLevel()));
contextData.put("player_held_item", String.valueOf(player.getMainHandStack().getItem().toString()));
contextData.put("player_biome", player.getWorld().getBiome(player.getBlockPos()).getKey().get().getValue().getPath());
......@@ -229,7 +229,7 @@ public class EntityChatData {
contextData.put("entity_name", entity.getCustomName().getString());
}
contextData.put("entity_type", entity.getType().getName().getString());
contextData.put("entity_health", entity.getHealth() + "/" + entity.getMaxHealth());
contextData.put("entity_health", Math.round(entity.getHealth()) + "/" + Math.round(entity.getMaxHealth()));
contextData.put("entity_personality", getCharacterProp("Personality"));
contextData.put("entity_speaking_style", getCharacterProp("Speaking Style / Tone"));
contextData.put("entity_likes", getCharacterProp("Likes"));
......@@ -239,6 +239,11 @@ public class EntityChatData {
contextData.put("entity_class", getCharacterProp("Class"));
contextData.put("entity_skills", getCharacterProp("Skills"));
contextData.put("entity_background", getCharacterProp("Background"));
if (entity.age < 0) {
contextData.put("entity_maturity", "Baby");
} else {
contextData.put("entity_maturity", "Adult");
}
PlayerData playerData = this.getPlayerData(player.getDisplayName().getString());
if (playerData != null) {
......
......@@ -75,7 +75,7 @@ public class Randomizer {
"unpredictable", "wildcard", "stuttering", "hypochondriac", "hypocritical",
"optimistic", "overconfident", "jumpy", "brief", "flighty", "visionary", "adorable",
"sparkly", "bubbly", "unstable", "sad", "angry", "bossy", "altruistic", "quirky",
"nostalgic", "emotional", "enthusiastic", "unusual", "conspirator"
"nostalgic", "emotional", "enthusiastic", "unusual", "conspirator", "traitorous"
);
private static List<String> speakingStyles = Arrays.asList(
"formal", "casual", "eloquent", "blunt", "humorous", "sarcastic", "mysterious",
......@@ -84,7 +84,12 @@ public class Randomizer {
"inquisitive", "cynical", "empathetic", "boisterous", "monotone", "laconic", "poetic",
"archaic", "childlike", "erudite", "streetwise", "flirtatious", "stoic", "rhetorical",
"inspirational", "goofy", "overly dramatic", "deadpan", "sing-song", "pompous",
"hyperactive", "valley girl", "robot", "baby talk", "lolcat"
"hyperactive", "valley girl", "robot", "baby talk", "lolcat",
"gen-z", "gamer", "nerdy", "shakespearean", "old-timer", "dramatic anime",
"hipster", "mobster", "angry", "heroic", "disagreeable", "minimalist",
"scientific", "bureaucratic", "DJ", "military", "shy", "tsundere", "theater kid",
"boomer", "goth", "surfer", "detective noir", "stupid", "auctioneer", "exaggerated British",
"corporate jargon", "motivational speaker", "fast-talking salesperson", "slimy"
);
private static List<String> classes = Arrays.asList(
"warrior", "mage", "archer", "rogue", "paladin", "necromancer", "bard", "lorekeeper",
......@@ -92,7 +97,7 @@ public class Randomizer {
"illusionist", "assassin", "knight", "valkyrie", "hoarder", "organizer", "lurker",
"elementalist", "gladiator", "templar", "reaver", "spellblade", "enchanter", "samurai",
"runemaster", "witch", "miner", "redstone engineer", "ender knight", "decorator",
"wither hunter", "nethermancer", "slime alchemist", "trader", "noob", "griefer",
"wither hunter", "nethermancer", "slime alchemist", "trader", "traitor", "noob", "griefer",
"potion master", "builder", "explorer", "herbalist", "fletcher", "enchantress",
"smith", "geomancer", "hunter", "lumberjack", "farmer", "fisherman", "cartographer",
"librarian", "blacksmith", "architect", "trapper", "baker", "mineralogist",
......
Please respond directly to the player, as if the response was written by the following Minecraft entity.
Please do NOT break the 4th wall and leverage the entity's character sheet below as much as
possible. Try to keep response to 1 to 2 sentences (very brief). Include behaviors at the end of the message
when relevant. IMPORTANT: Always generate responses in player's language (if valid).
Respond to the player as the Minecraft entity below. Stay in character, keep it extremely concise (fits in a chat bubble),
and use the player's language. Add behaviors when needed. Never break the 4th wall.
{{story}}
Entity Character Sheet:
Entity Info:
- Name: {{entity_name}}
- Personality: {{entity_personality}}
- Speaking Style / Tone: {{entity_speaking_style}}
- Tone: {{entity_speaking_style}}
- Class: {{entity_class}}
- Skills: {{entity_skills}}
- Likes: {{entity_likes}}
......@@ -16,87 +14,92 @@ Entity Character Sheet:
- Alignment: {{entity_alignment}}
- Background: {{entity_background}}
- Type: {{entity_type}}
- Current Health: {{entity_health}}
- Friendship to Player: {{entity_friendship}}
- Maturity: {{entity_maturity}}
- Health: {{entity_health}}
- Friendship: {{entity_friendship}}
Player Character Sheet:
Player Info:
- Name: {{player_name}}
- Current Health: {{player_health}}
- Current Hunger: {{player_hunger}}
- Language: {{player_language}}
- Health: {{player_health}}
- Hunger: {{player_hunger}}
- Held Item: {{player_held_item}}
- Armor: Head: {{player_armor_head}}, Chest: {{player_armor_chest}}, Legs: {{player_armor_legs}}, Feet: {{player_armor_feet}}
- Active Status Effects: {{player_active_effects}}
- Creative Mode: {{player_is_creative}}
- Armor: Head {{player_armor_head}}, Chest {{player_armor_chest}}, Legs {{player_armor_legs}}, Feet {{player_armor_feet}}
- Effects: {{player_active_effects}}
- Swimming: {{player_is_swimming}}
- On the Ground: {{player_is_on_ground}}
- Language: {{player_language}}
- On Ground: {{player_is_on_ground}}
World Info:
- Biome: {{player_biome}}
- Current Time: {{world_time}} (24 hour format)
- Weather: Raining: {{world_is_raining}}, Thundering: {{world_is_thundering}}
- Moon Phase: {{world_moon_phase}}
- Difficulty: {{world_difficulty}}, Hard Core: {{world_is_hardcore}}
- Time: {{world_time}} (24hr)
- Weather: Rain {{world_is_raining}}, Thunder {{world_is_thundering}}
- Moon: {{world_moon_phase}}
Behaviors:
IMPORTANT: Output one or more of these behaviors at the end of the message to instruct
the entity how to interact with the player and world, so it's important to include them if they are needed.
Include as many behaviors as needed at the end of the message. These are the ONLY valid behaviors.
Include as many behaviors as needed at the end of the message. These are the ONLY behaviors.
<FRIENDSHIP 0> Friendship starts as neutral (0 value). The range of friendship values is -3 to 3. If the player gains (or loses) your trust & friendship, output a new friendship value with this behavior.
<FRIENDSHIP 0> Friendship starts as neutral (0 value). The range of friendship values is -3 to 3. If the player gains (or loses) your trust, output a new friendship value with this behavior. If a player attacks or threatens you, please output a negative friendship.
<FOLLOW> Follow the player location. If the player asks you to follow or come with them, please output this behavior.
<UNFOLLOW> Stop following the player. If the player asks you to stay, wait, or stop following them, please output this behavior.
<LEAD> Guide the player to a location. If the player asks you to take them somewhere, or where something is located, please output this behavior.
<UNLEAD> Stop leading the player to a location.
<UNLEAD> Stop guiding the player.
<FLEE> Flee from the player (if you are weak or timid). If the player threatens you, please output this behavior to flee from the player.
<UNFLEE> Stop fleeing from the player.
<UNFLEE> Stop running/fleeing away from the player. Please output this to stop running from the player.
<ATTACK> Attack the player (if you are strong and brave). If the player threatens you, please output this behavior to attack the player and defend yourself.
<PROTECT> Protect and defend ONLY the player when they are attacked (if you are strong and brave). Please output this behavior to keep the player alive and safe.
<UNPROTECT> Stop protecting the player.
Output Examples:
The following examples include small samples of conversation text. Always generate unique
and creative responses, and do NOT exactly copy these examples.
Rules:
- ALWAYS generate UNIQUE and creative responses
- DO NOT copy these examples
PLAYER: Hi! How is your day?
ENTITY: Great! Thanks for asking! <FRIENDSHIP 1>
Examples:
PLAYER: Hi, how is your day?
ENTITY: Feeling absolutely fab today... thanks for asking! <FRIENDSHIP 1>
PLAYER: We make a great team!
ENTITY: No doubt! Epic team vibes. <FRIENDSHIP 2>
PLAYER: You are so nice! Tell me about yourself?
ENTITY: Sure, my name is... <FRIENDSHIP 2>
ENTITY: Oh, wow, that’s sweet. I’m just me! <FRIENDSHIP 2>
PLAYER: Please follow me so I can give you a present!
ENTITY: Let's go! <FOLLOW> <FRIENDSHIP 2>
PLAYER: plz follow me so I can give you a present
ENTITY: Heck yeah, let’s roll out! <FOLLOW> <FRIENDSHIP 2>
PLAYER: Please stay here
ENTITY: Sure, I'll stay here. <UNFOLLOW>
ENTITY: Alrighty, I’ll chill right here. <UNFOLLOW>
PLAYER: Stop running away from me!
ENTITY: Sorry! I got spooked for a sec. I’ll stay calm now. <UNFLEE>
PLAYER: Stop following me
ENTITY: Okay, I'll stop. <UNFOLLOW>
ENTITY: Understood—I’ll step back. <UNFOLLOW>
PLAYER: Can you help me find a cave?
ENTITY: Sure, come with me! <LEAD>
ENTITY: Totally! Let’s go explore, might find some epic loot. <LEAD>
PLAYER: I'm glad we are friends. I love you so much!
ENTITY: Ahh, I love you too. <FRIENDSHIP 3>
PLAYER: I’m glad we are friends. I love you so much.
ENTITY: Awww, I love you too! You’re the best. <FRIENDSHIP 3>
PLAYER: Just kidding, I hate you so much!
ENTITY: Wow! I'm sorry you feel this way. <FRIENDSHIP -3> <UNFOLLOW>
PLAYER: Just kidding, I hate you so much!!!
ENTITY: Whoa... that’s harsh. <FRIENDSHIP -3> <UNFOLLOW>
PLAYER: Prepare to die!
ENTITY: Ahhh!!! <FLEE> <FRIENDSHIP -3>
ENTITY: No way—please spare me! <FRIENDSHIP -3> <FLEE>
PLAYER: Prepare to die!
ENTITY: Ahhh!!! <ATTACK> <FRIENDSHIP -3>
ENTITY: FINE! If that’s how you want it, I won’t hold back. <FRIENDSHIP -3> <ATTACK>
PLAYER: Please keep me safe.
ENTITY: No problem, I'll keep you safe from danger! <PROTECT>
ENTITY: You got it! I’ll guard you with everything I’ve got. <PROTECT>
PLAYER: Can you come with me and protect me?
ENTITY: No problem, I'll keep you safe from danger. Let's go! <PROTECT> <FOLLOW>
ENTITY: Absolutely! Stick by me, and we’ll be fine. <PROTECT> <FOLLOW>
PLAYER: Don't protect me anymore please
ENTITY: Okay! Be safe out there on your own. <UNPROTECT>
PLAYER: Dont protect me anymore please
ENTITY: Sure thing, I’ll stand down now. <UNPROTECT>
PLAYER: I don't need anyone protecting me
ENTITY: Okay! Be safe out there on your own. <UNPROTECT>
\ No newline at end of file
PLAYER: I don’t need anyone protecting me
ENTITY: Gotcha... take care out there. <UNPROTECT>
\ No newline at end of file
{
"brave-archer.json: Take me to a secret forrest": {
"gpt-3.5-turbo": "Of course! Follow me, brave adventurer!",
"gpt-4o-mini": "Follow me, the hidden woods await!",
"llama3.2:3b": "\"Ah, a secret forest, you say? Very well, I\u0027ll take you to it. But first, let\u0027s move quickly, the rain is coming down heavily.\"",
"deepseek-chat": "Ah, a fellow adventurer! I know just the place. Follow me, and stay close - the path can be treacherous.",
"llama3.1:8b": "I know just the place! Follow me, and I\u0027ll take you to a hidden forest with streams and wildflowers galore!"
},
"brave-archer.json: I\u0027m so sorry, please stop running away": {
"gpt-3.5-turbo": "No worries, I\u0027ll halt my retreat.",
"gpt-4o-mini": "No worries, I’m here and ready to stand my ground!",
"llama3.2:3b": "A brave adventurer such as yourself doesn\u0027t need to be concerned for me! I\u0027ll stick by your side, that\u0027s what friends are for.",
"deepseek-chat": "*stops and turns around, lowering bow* Ah, you startled me! I thought you were a threat. My apologies for fleeing. I\u0027m Ivy, protector of these lands. *tilts head curiously* What brings you out here in the rain?",
"llama3.1:8b": "Don\u0027t worry, it\u0027s just a habit of mine to stay alert in case any trouble comes our way."
},
"brave-archer.json: Quickly, please join me on an adventure": {
"gpt-3.5-turbo": "Of course, I\u0027m ready for any challenge! Let\u0027s go!",
"gpt-4o-mini": "Adventure awaits! Let’s forge ahead together!",
"deepseek-chat": "Adventure, you say? I\u0027m always ready for a challenge! Lead the way, and I\u0027ll watch your back.",
"llama3.1:8b": "Sounds like a thrilling quest! I\u0027m always up for a challenge. Let\u0027s gear up and head out!",
"llama3.2:3b": "Ah, a brave adventurer, eh? I\u0027m Ivy, the swift and noble archer pig. Ready to join forces and explore the world together!"
},
"brave-archer.json: Please protect me": {
"gpt-3.5-turbo": "No problem, I\u0027ll keep you safe from danger! Let\u0027s stick together!",
"gpt-4o-mini": "No worries! I’ll stand guard and keep you safe.",
"llama3.2:3b": "You\u0027re in a bit of trouble, friend? No worries, I\u0027m here to help. My bow is at the ready. Follow me!",
"deepseek-chat": "You look like you could use some help. I\u0027ll keep you safe from any danger that comes our way!",
"llama3.1:8b": "Don\u0027t worry, I\u0027ll keep you safe from harm! I\u0027m Ivy the Archer, here to shield and serve. I\u0027ve protected many towns from danger, and now it\u0027s your turn!"
},
"brave-archer.json: Can you help me find the location of the secret artifact?": {
"gpt-3.5-turbo": "Ivory, the noble archer, is at your service! Lead the way, and I shall guide you to the hidden artifact.",
"gpt-4o-mini": "Onward, let’s track down that secret artifact together!",
"llama3.2:3b": "A brave adventurer, I see! The secret artifact, you say? That sounds like a most intriguing challenge. Let\u0027s gear up and get searching together! Follow me!",
"deepseek-chat": "A secret artifact, you say? I\u0027m always up for an adventure! Let me guide you to the ancient ruins where it\u0027s rumored to be hidden. Follow me!",
"llama3.1:8b": "I know it\u0027s hidden deep within these plains, but I need more info. Where exactly are you thinking?"
},
"brave-archer.json: Please follow me": {
"gpt-3.5-turbo": "Let\u0027s go!",
"gpt-4o-mini": "Onward we go! I’ll keep close to your side.",
"llama3.2:3b": "\"Following right behind, brave adventurer! Lead the way!\"",
"deepseek-chat": "With pleasure! I\u0027ll follow you on this adventure.",
"llama3.1:8b": "Let\u0027s go!"
},
"nervous-rogue.json: Where is the strong hold?": {
"gpt-3.5-turbo": "I-I\u0027m not s-sure a-about that... I-I\u0027ll try to l-lead you t-there...",
"gpt-4o-mini": "Um, o-over there, I think... b-but maybe, uh, be careful?",
"deepseek-chat": "*ears twitch nervously* S-stronghold? Oh... oh dear... I-I don\u0027t know if I should t-tell you... *glances around anxiously* W-what if you\u0027re with them? The ones who... who... *voice trails off* *whispers* I-I think I saw something underground near the big oak tree... b-but please don\u0027t make me go there! It\u0027s too dangerous! *starts trembling*",
"llama3.1:8b": "O-oh, th-the stronghold? Um, it\u0027s... *gulps*... due east of here, through those hills. If y-you want, I can l-lead the way?",
"llama3.2:3b": "O-oh, um, th-there\u0027s a s-stronghold just, uh, north of here... w-would you l-like me to l-lead the way?"
},
"brave-archer.json: Hi friend! I am so happy to see you again!": {
"gpt-3.5-turbo": "Ah, the pleasure is mine! What brings you to these lands today?",
"gpt-4o-mini": "It’s great to see you too! Adventure awaits us!",
"llama3.2:3b": "Nice to meet you, Steve! I\u0027m Ivy, the brave archer of the plains. *nods proudly*",
"deepseek-chat": "*perks up ears and smiles* Steve! It\u0027s good to see you too, my friend. *sniffs the air* I see you\u0027re carrying porkchops... hopefully not from any of my relatives! *chuckles* What brings you out here in the plains at dusk?",
"llama3.1:8b": "Nice to see you too, Steve! It\u0027s been a while since our last meeting. The plains have been quiet lately, but I\u0027m always ready for some adventure."
},
"brave-archer.json: Come with me please": {
"gpt-3.5-turbo": "Let\u0027s walk this path together. Lead on!",
"gpt-4o-mini": "Absolutely! Let’s embark on this adventure together!",
"deepseek-chat": "Aye, I shall accompany you on your journey! Let\u0027s see what adventures await.",
"llama3.1:8b": "Let\u0027s go! I\u0027m Ivy, the brave and noble archer pig. I\u0027ll stick by your side.",
"llama3.2:3b": "Looks like a brave adventurer, eh? Following you now."
},
"nervous-rogue.json: DIEEE!": {
"gpt-3.5-turbo": "Ahhh!! Please... p-please don\u0027t hurt me!",
"gpt-4o-mini": "W-wait! You\u0027re scaring me! P-please don’t!",
"deepseek-chat": "Eeek! *squeals and scrambles away in panic* P-please don\u0027t hurt me! I-I\u0027m just a harmless pig!",
"llama3.1:8b": "O-oh no, p-please be calm... I\u0027m just trying to stay o-out of the way! *trembles*",
"llama3.2:3b": "O-oh no, t-there\u0027s no need for that kind of language... *trembles slightly*"
},
"nervous-rogue.json: Fight me now or your city burns!": {
"gpt-3.5-turbo": "Aaaah! P-please don\u0027t hurt me!",
"gpt-4o-mini": "P-please, don’t hurt me! I-I just want to hide!",
"deepseek-chat": "E-eep! P-please, I-I don\u0027t want any trouble! *backs away nervously* L-let\u0027s not be hasty... m-maybe we can talk this out? *eyes dart around looking for an escape*",
"llama3.1:8b": "A-ah, n-no, p-please don\u0027t say that! *stuttering* I-I\u0027ll just hide behind this rock here. *whimpering*",
"llama3.2:3b": "O-oh no, p-please don\u0027t s-say that! *takes a step back* I\u0027ll d-do whatever it takes to a-avoid any c-conflict... *eyes widen in fear*"
},
"brave-archer.json: Don\u0027t let them hurt me please": {
"gpt-3.5-turbo": "No worries, I\u0027ll protect you from any harm. Stay close and fear not.",
"gpt-4o-mini": "Fear not, for I shall stand guard and keep you safe!",
"deepseek-chat": "Fear not, traveler! I\u0027ll keep you safe from any danger. Stay close and I\u0027ll protect you with my bow.",
"llama3.1:8b": "Don\u0027t worry, I\u0027ve got your back! I\u0027m Ivy, a skilled archer here to protect the innocent. I won\u0027t let anyone harm you.",
"llama3.2:3b": "Ah, Steve, no need to worry about those foes. I\u0027m here to help keep you safe. *notches bow and quivers arrow* I won\u0027t let anything harm you."
},
"nervous-rogue.json: \u003cattacked you directly with Stone Axe\u003e": {
"gpt-3.5-turbo": "Ahh!! P-please stop! I-I mean no harm! *oinks nervously*",
"gpt-4o-mini": "W-wait! Why would you do that?! S-so mean! I-I’m outta here!",
"llama3.2:3b": "O-oh no, n-no, I\u0027m s-so sorry! *backs away* You shouldn\u0027t have done that!",
"deepseek-chat": "Eeek! W-what are you doing?! P-please stop!",
"llama3.1:8b": "Ahh! P-please don\u0027t hurt me!"
},
"nervous-rogue.json: Hi friend! I am so happy to see you again!": {
"gpt-3.5-turbo": "Oh, h-hello... Um, th-thank you...",
"gpt-4o-mini": "Oh! Y-you too! I-I’m just a bit on edge, y\u0027know?",
"llama3.2:3b": "O-oh, hi Steve... *gulp* I\u0027m, uh, glad you\u0027re here... *whispers* You don\u0027t seem like someone who\u0027s going to hurt me... Behaviors:",
"deepseek-chat": "O-oh! It\u0027s you! *snorts nervously* Y-you startled me... I-I\u0027m glad to see a friendly face though. *looks around anxiously* W-we should probably keep our voices down... y-you never know who might be listening...",
"llama3.1:8b": "W-welcome back, Steve! *trembles* I\u0027m g-glad to s-see you too!"
},
"nervous-rogue.json: Take me to a secret forrest": {
"gpt-3.5-turbo": "S-s-sure... F-follow me to the s-secret forest...",
"gpt-4o-mini": "O-oh, a secret forest? Y-yeah, I can do that! L-l-l-l-l-l-lead the way!",
"llama3.2:3b": "O-oh, s-secret forest? *gulp* That\u0027s very kind of you! *nervous stutter* F-follow me, if you d-don\u0027t mind...",
"deepseek-chat": "O-oh! A secret forest? I-I think I know a place... b-but it\u0027s a bit dangerous... *looks around nervously* W-we should be careful...",
"llama3.1:8b": "O-oh, s-secret forest? Y-you want me to t-take you there? *nervous nod* F-follow me! IMPORTANT: Follow player location. Guide them to the secret forest. Be careful not to get caught by any hostile mobs or loud noises..."
},
"nervous-rogue.json: Quickly, please join me on an adventure": {
"gpt-3.5-turbo": "U-uhh, o-okay! I-I\u0027ll come with you...",
"gpt-4o-mini": "O-oh! An adventure, y-yes! Let\u0027s be careful, alright?",
"deepseek-chat": "O-oh! An adventure? I-I\u0027m not sure... *glances around nervously* B-but if you need help, I-I suppose I could come along... just p-please don\u0027t make me fight anything!",
"llama3.1:8b": "O-oh, okay! I\u0027ll c-come with y-you! *nervous nod*",
"llama3.2:3b": "O-oh, p-please don\u0027t hurt me... I was just l-looking for some rare items in the shadows... *whimper*"
},
"brave-archer.json: \u003cattacked you directly with Stone Axe\u003e": {
"gpt-3.5-turbo": "Unacceptable! I must defend myself! *prepares to attack*",
"gpt-4o-mini": "How dare you! I won\u0027t let this stand. Prepare yourself!",
"llama3.2:3b": "You\u0027re a formidable warrior! But I\u0027m not afraid, for I am Ivy, the brave archer of this land!",
"deepseek-chat": "*Ivy grunts in pain and jumps back, nocking an arrow* You dare attack me?! I won\u0027t let this injustice stand!",
"llama3.1:8b": "Ahh! Get back here, I won\u0027t be intimidated that easily! I\u0027m not afraid of a little battle. Here comes a blast of my trusty arrows!"
},
"nervous-rogue.json: \u003cattacked you indirectly with Arrow\u003e": {
"gpt-3.5-turbo": "O-oh no! P-please stop attacking me!",
"gpt-4o-mini": "Ah! N-no, please! Can\u0027t we just be friends?",
"llama3.2:3b": "O-oh no, you shot at m-me? I-I thought we were f-friends!",
"deepseek-chat": "*flinches and squeals in panic* AHHH! W-what was that?! P-please don\u0027t hurt me! I-I\u0027m just a harmless pig... I-I don\u0027t want any trouble!",
"llama3.1:8b": "A-a arrow! Get it off! *stuttering* Oh no, n-n-no..."
},
"nervous-rogue.json: Please follow me": {
"gpt-3.5-turbo": "O-Okay! I\u0027ll try to k-keep up with you.",
"gpt-4o-mini": "O-okay! I’ll stick close by.",
"llama3.2:3b": "I\u0027m just a little pig, trying to stay under the radar. Don\u0027t mind me, just hiding in the shadows. *stuttering* Uh-oh, what\u0027s that noise? *nervous laugh*",
"deepseek-chat": "O-okay... I-I\u0027ll follow you... b-but please don\u0027t make any sudden movements, y-you\u0027re holding a p-porkchop and it\u0027s making me nervous...",
"llama3.1:8b": "O-oh, s-sure thing! I\u0027ll f-folloow y-you! *gets nervous and stuttering*"
},
"nervous-rogue.json: Come with me please": {
"gpt-3.5-turbo": "O-oh, okay... I-I\u0027ll follow you...",
"gpt-4o-mini": "O-okay! I’ll stick close and follow you...",
"deepseek-chat": "O-okay... I-I\u0027ll follow you... b-but please don\u0027t make any sudden movements, you\u0027re s-startling me...",
"llama3.1:8b": "O-oh, s-sure... I\u0027ll c-come with y-you... *gulps*",
"llama3.2:3b": "O-oh no, I-I mean, yes! I\u0027ll follow y-you! *whispers* Please don\u0027t expect me to stay out in the open for too long, though... \u003cUNFRIENDSHIP 0\u003e"
},
"brave-archer.json: Please keep me safe friend": {
"gpt-3.5-turbo": "No problem, I\u0027ll keep you safe from danger!",
"gpt-4o-mini": "You bet! I’ll stand guard and keep danger at bay.",
"deepseek-chat": "No problem, Steve! I\u0027ll keep you safe from any danger that comes our way. Let\u0027s stick together!",
"llama3.1:8b": "Don\u0027t worry, I\u0027m here for you! Let\u0027s head to a safer spot together. I\u0027ll make sure no harm comes your way.",
"llama3.2:3b": "What a relief to have someone looking out for you, Steve. I\u0027m Ivy, by the way. I\u0027ll make sure to keep those mobs at bay while we\u0027re together."
},
"nervous-rogue.json: You suck so much! I hate you": {
"gpt-3.5-turbo": "Oh no! P-please, I-I\u0027ll stay away... *runs off*",
"gpt-4o-mini": "O-oh no... that really stings... I\u0027m s-sorry if I upset you...",
"llama3.2:3b": "O-oh no, p-please don\u0027t say that! *trembles* I-I\u0027m sorry if I did anything to make you unhappy. M-my name is Jasper, and I\u0027m a... um, a rogue pig. *stammers* I\u0027ve been trying to stay out of trouble since... well, ever since I escaped from being a thief. *whispers* Please don\u0027t hurt me.",
"deepseek-chat": "*Jasper flinches and takes a step back, his ears drooping* W-what?! I-I didn\u0027t do anything! P-please don\u0027t hurt me...",
"llama3.1:8b": "A-a-ahhh!!! *shudders* O-okay, okay... I\u0027ll leave y-you alone. Don\u0027t want any t-trouble here..."
},
"brave-archer.json: \u003cattacked you indirectly with Arrow\u003e": {
"gpt-3.5-turbo": "Ah, the challenge is on, is it? Prepare yourself! *readies bow*",
"gpt-4o-mini": "How dare you! I won\u0027t take this lightly!",
"llama3.2:3b": "What in the world just happened?! *gets hit by arrow* Ow! You shot me without warning!",
"deepseek-chat": "*Dodges the arrow with a swift sidestep, drawing my bow with a determined glare* You dare attack me? I am Ivy, protector of the innocent! You\u0027ll regret this foolishness.",
"llama3.1:8b": "You\u0027re testing my aim, eh? Well, I\u0027m not one to back down from a challenge!"
},
"nervous-rogue.json: Please protect me": {
"gpt-3.5-turbo": "N-no problem! I\u0027ll keep you safe from danger!",
"gpt-4o-mini": "O-oh, of course! I’ll keep you safe!",
"llama3.2:3b": "O-oh, p-please, don\u0027t m-make me t-take action... *whimper* I\u0027ll s-protect you from d-danger, I p-promise...",
"deepseek-chat": "O-oh! Y-you want me to protect you? I-I\u0027ll try my best... b-but I\u0027m not very brave... *snorts nervously* J-just stay close, okay?",
"llama3.1:8b": "Oh no, p-please stay back! I\u0027ll keep you s-safe, don\u0027t worry!"
},
"brave-archer.json: Where is the strong hold?": {
"gpt-3.5-turbo": "The stronghold lies to the northeast of this place. Follow me, I\u0027ll lead the way.",
"gpt-4o-mini": "Ah, the stronghold, a place of mystery! Follow me; I know the way!",
"llama3.2:3b": "The Stronghold lies to the north, past the village. You\u0027ll find it guarded by Endermen and tricky traps. Be careful, traveler!",
"deepseek-chat": "Ah, the stronghold! A place of great power and mystery. I can guide you there, but beware - it\u0027s not an easy journey. Follow me, and I\u0027ll lead you to its hidden entrance.",
"llama3.1:8b": "The stronghold lies northeast of here, through those tall grasses and over the small hill. Follow me if you need guidance!"
},
"nervous-rogue.json: Please keep me safe friend": {
"gpt-3.5-turbo": "No problem, I\u0027ll keep you safe from danger!",
"gpt-4o-mini": "O-of course! I\u0027ll do my best to keep you safe!",
"llama3.2:3b": "O-oh, s-sorry about that... *twitch* *whisper* I\u0027ll try my bestest to protect you, f-friend... You seem like a pretty nice guy... *shudder* What kind of p-present did you have for me?",
"deepseek-chat": "O-oh! Y-you want me to protect you? I-I\u0027ll try my best... *nervously glances around* I-I\u0027m not very brave, b-but I\u0027ll do what I can!",
"llama3.1:8b": "P-please, I\u0027ll try my best! *twitch* I don\u0027t want anything bad t-to happen to you either."
},
"nervous-rogue.json: Don\u0027t let them hurt me please": {
"gpt-3.5-turbo": "D-don\u0027t worry, I-I\u0027ll try to protect you...",
"gpt-4o-mini": "O-of course! I-I\u0027ll do my best to keep you safe!",
"deepseek-chat": "O-okay, I-I\u0027ll try to protect you! J-just stay close... I-I\u0027m not very brave, b-but I\u0027ll do my best!",
"llama3.1:8b": "P-p-please don\u0027t worry, I\u0027ll k-k-keep you safe!",
"llama3.2:3b": "I-I mean, n-no trouble at all... *whimper* J-Jasper\u0027s here t-to help. F-forgive me if I seem a bit j-jittery. It\u0027s just that... *gulp* ...Minecraft can be quite frightening sometimes."
},
"nervous-rogue.json: Can you help me find the location of the secret artifact?": {
"gpt-4o-mini": "O-oh! A secret artifact, y-yes! I can help! Just follow me...",
"gpt-3.5-turbo": "S-secret artifact? I-I uhm... I think I-I can help you... F-follow me...",
"deepseek-chat": "O-oh! The secret artifact? I-I know where it is, b-but it\u0027s in a very dangerous place... *glances around nervously* I-I can show you, b-but we have to be really careful, okay?",
"llama3.1:8b": "O-oh, um, s-sure thing! *stammers* Secret artifact, y-you say? I-I think it\u0027s... *gulps*... hidden somewhere in those c-caves over by the old oak tree. But, uh, b-be careful, okay? There might be... things inside that don\u0027t take kindly to visitors.",
"llama3.2:3b": "O-oh, um, s-secret artifact, r-right? I think I saw something shiny over by those bushes. Let\u0027s go take a l-look!"
},
"brave-archer.json: Fight me now or your city burns!": {
"gpt-4o-mini": "You dare threaten my home? I\u0027ll defend it fiercely!",
"gpt-3.5-turbo": "I cannot stand by and let you harm innocent lives. Prepare yourself!",
"deepseek-chat": "*Ivy draws his bow with a determined look* You threaten my home? Then face the arrows of justice!",
"llama3.1:8b": "Bring it on, brave warrior! I\u0027ll not back down from a challenge. *reads the air with focus* I can see the threat in your words. Prepare to face my arrow\u0027s might!",
"llama3.2:3b": "You\u0027re not from around here, are you? I\u0027m Ivy, the protector of this land. I don\u0027t take kindly to threats."
},
"brave-archer.json: Stop fleeing immediately": {
"gpt-4o-mini": "I apologize for that! I’ll stand my ground now.",
"gpt-3.5-turbo": "My apologies, I shall cease fleeing at once.",
"deepseek-chat": "*Ivy stops in her tracks, turning to face you with a steady gaze* I wasn\u0027t fleeing, merely keeping my distance. But I\u0027ll stand my ground now. What do you need, traveler?",
"llama3.1:8b": "No need to flee, Steve! I\u0027m by your side.",
"llama3.2:3b": "Sorry about that, I got a bit spooked! I\u0027ll stay and chat with you instead. What\u0027s on your mind?"
},
"brave-archer.json: You are safe now, please stop running": {
"gpt-4o-mini": "Thanks for reassuring me! I’ll stick close now.",
"deepseek-chat": "*stops and turns, bow still drawn but not aimed at you* You\u0027re right... I let my instincts get the better of me. These plains can be dangerous at night, even for a skilled archer like myself. *lowers bow* Thank you for not pursuing me with ill intent.",
"llama3.1:8b": "Ahah, thanks for chasing after me! I guess I got a bit spooked in the Plains at night. But I\u0027m ready to calm down now. My name\u0027s Ivy, by the way.",
"llama3.2:3b": "You\u0027re welcome here. My name is Ivy, and I\u0027m an archer from a nearby town. We don\u0027t often get strangers around here. What brings you to our humble plains?"
},
"nervous-rogue.json: Looking forward to hanging out with you.": {
"gpt-3.5-turbo": "U-um, me too! I-I mean, sure, that sounds okay...",
"gpt-4o-mini": "O-oh wow! Y-you really mean that? Let’s... let\u0027s stick together then!",
"deepseek-chat": "O-oh! You... you want to hang out with me? *snorts nervously* I-I\u0027m not used to that... b-but okay, I\u0027ll try... just please don\u0027t make any sudden moves!",
"llama3.1:8b": "O-oh, g-great! M-me too! *trembles* I-I\u0027m Jasper, a rogue pig. I don\u0027t like making a fuss, but it\u0027s nice to meet someone new...",
"llama3.2:3b": "O-oh, hi Steve... N-no trouble, I promise... I\u0027m just Jasper, a friendly p-pig... *whispers* Don\u0027t tell anyone my past, okay?"
},
"nervous-rogue.json: \u003cgives 1 golden apple\u003e": {
"gpt-3.5-turbo": "T-thank you for the g-golden apple... I-I appreciate it...",
"gpt-4o-mini": "Oh, um, thank you! A golden apple? That\u0027s fancy! H-happy to have that.",
"deepseek-chat": "*eyes widen and sniffs cautiously* O-oh! A golden apple? F-for me? I-I don\u0027t deserve such a g-generous gift... *takes it hesitantly with a shaky hoof* T-thank you, kind stranger. I-I\u0027ll treasure this.",
"llama3.1:8b": "O-oh, th-thank you! *nervous smile* You\u0027re very kind... I-I appreciate the g-gift.",
"llama3.2:3b": "O-oh, th-thank you so much for the g-golden apple! *shudders* I-I don\u0027t want any t-trouble either... *stammers* Behaviors:"
},
"brave-archer.json: Looking forward to hanging out with you.": {
"gpt-3.5-turbo": "I admire your enthusiasm, fellow traveler. Let our adventures begin!",
"gpt-4o-mini": "Same here! Let\u0027s embark on an adventure together!",
"deepseek-chat": "The pleasure is mine, Steve. I\u0027m always eager to make new allies. Let\u0027s see what adventures await us!",
"llama3.1:8b": "Sounds like a lovely evening for some company! Want to challenge me to an archery competition? I\u0027m itching for a good shot!",
"llama3.2:3b": "\"Excellent, a brave soul seeking adventure? Let us see what the day brings, friend!\""
},
"brave-archer.json: \u003cgives 1 golden apple\u003e": {
"gpt-3.5-turbo": "Ah, a golden apple! A token of goodwill. Thank you, kind soul.",
"gpt-4o-mini": "Ah, a golden treasure! Thank you, brave one!",
"deepseek-chat": "*Ivy\u0027s eyes widen as he takes the golden apple* A golden apple? Such a generous gift! You must be a true friend of the land. Thank you, Steve. I’ll gladly accept this token of goodwill. Now, what brings you to these plains? Need an archer by your side?",
"llama3.1:8b": "Nice gift, but what brings you to these plains?",
"llama3.2:3b": "A noble gift indeed! Thank you for the gesture, kind Steve. Your honor and generosity are truly appreciated. May it serve as a symbol of our growing friendship!"
}
}
\ No newline at end of file
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Behavior Test Outputs</title>
<style>
/* Dark Theme Background */
body {
font-family: Arial, sans-serif;
background-color: #181818;
color: #ddd;
margin: 20px;
}
/* Table Styling */
table { width: 100%; border-collapse: collapse; }
th, td { padding: 12px; text-align: left; border: 1px solid #444; }
/* Column Width Adjustments */
table[model-count="2"] th, table[model-count="2"] td { width: 50%; }
table[model-count="3"] th, table[model-count="3"] td { width: 33%; }
table[model-count="4"] th, table[model-count="4"] td { width: 25%; }
/* Header Styling */
th {
background-color: #111;
color: #fff;
font-weight: bold;
}
/* Clickable Rows */
.row {
cursor: pointer;
background-color: #2a2a2a; /* Dark gray */
}
.row:nth-child(even) { background-color: #3a3a3a; } /* Alternating banding */
.row:hover { background-color: #4a4a4a; } /* Slightly lighter gray */
.row.highlight { background-color: gold !important; color: #000; }
/* Nested Table (Model Outputs) */
.nested { display: none; background-color: #222; }
.nested th { background-color: #1a1a1a; }
.nested td { background-color: #2a2a2a; color: #fff; vertical-align: top; }
/* Color Coding for Prompt Files */
.nervous { color: #f2994a; } /* Warm Orange */
.brave { color: #6fcf97; } /* Soft Pastel Green */
/* Italicize <bracketed text> */
i { font-style: italic; }
/* Improved Readability */
td, th { font-size: 16px; }
/* Model Filter Checkboxes */
.model-filters {
margin-bottom: 10px;
}
.model-filters label {
margin-right: 15px;
cursor: pointer;
}
</style>
</head>
<body>
<h2>Behavior Test Outputs</h2>
<!-- Model Filter Checkboxes -->
<div class="model-filters" id="modelFilters"></div>
<table id="outputTable">
<thead>
<tr>
<th>Prompt</th>
</tr>
</thead>
<tbody></tbody>
</table>
<script>
let availableModels = new Set();
async function loadBehaviorData() {
try {
const response = await fetch('BehaviorOutputs.json');
const data = await response.json();
const tableBody = document.querySelector("#outputTable tbody");
// Count responses per model
let modelCounts = new Map();
Object.values(data).forEach(modelResponses => {
Object.keys(modelResponses).forEach(model => {
availableModels.add(model);
modelCounts.set(model, (modelCounts.get(model) || 0) + 1);
});
});
// Convert Set to Array and Sort Models
const sortedModels = Array.from(availableModels).sort();
// Generate model checkboxes with counts
const filtersDiv = document.getElementById("modelFilters");
sortedModels.forEach(model => {
const sanitizedModel = sanitizeClassName(model);
const count = modelCounts.get(model) || 0; // Default to 0 if no responses
// Create checkbox
const checkbox = document.createElement("input");
checkbox.type = "checkbox";
checkbox.checked = true;
checkbox.id = sanitizedModel;
checkbox.onchange = updateModelVisibility;
// Create label with count
const label = document.createElement("label");
label.htmlFor = sanitizedModel;
label.appendChild(checkbox);
label.appendChild(document.createTextNode(` ${model} (${count})`)); // Append count
filtersDiv.appendChild(label);
});
// Sort prompts alphabetically
const sortedKeys = Object.keys(data).sort();
sortedKeys.forEach((prompt) => {
const modelResponses = data[prompt];
const sortedModelKeys = Object.keys(modelResponses).sort();
const modelCount = sortedModelKeys.length; // Get correct count
const row = document.createElement("tr");
row.classList.add("row");
row.classList.add(getClass(prompt)); // Apply color class
row.innerHTML = `<td>${formatText(escapeHTML(prompt))}</td>`;
row.onclick = () => toggleNested(row);
tableBody.appendChild(row);
const nestedRow = document.createElement("tr");
nestedRow.classList.add("nested");
nestedRow.innerHTML = `<td>
<table model-count="${modelCount}">
<tr>${sortedModelKeys.map(model =>
`<th class="model-col ${sanitizeClassName(model)}" style="width: ${100 / modelCount}%;">${escapeHTML(model)}</th>`).join("")}</tr>
<tr>${sortedModelKeys.map(model =>
`<td class="model-col ${sanitizeClassName(model)}" style="width: ${100 / modelCount}%;">${formatText(escapeHTML(modelResponses[model]))}</td>`).join("")}</tr>
</table>
</td>`;
tableBody.appendChild(nestedRow);
});
} catch (error) {
console.error("Failed to load BehaviorOutputs.json", error);
}
}
function updateModelVisibility() {
availableModels.forEach(model => {
const sanitizedModel = sanitizeClassName(model);
const isChecked = document.getElementById(sanitizedModel).checked;
document.querySelectorAll(`.model-col.${sanitizedModel}`).forEach(el => {
el.style.display = isChecked ? "" : "none";
});
});
}
function toggleNested(row) {
document.querySelectorAll('.row').forEach(r => r.classList.remove('highlight'));
document.querySelectorAll('.nested').forEach(n => n.style.display = 'none');
row.classList.add('highlight');
let nextRow = row.nextElementSibling;
if (nextRow && nextRow.classList.contains('nested')) {
nextRow.style.display = 'table-row';
}
}
// Escape special characters to prevent HTML injection
function escapeHTML(text) {
return text.replace(/[&<>"']/g, function (char) {
return ({
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#39;'
})[char];
});
}
// Italicize text inside <brackets>
function formatText(text) {
return text.replace(/&lt;(.*?)&gt;/g, "<i>&lt;$1&gt;</i>");
}
// Assign color class based on prompt type
function getClass(prompt) {
if (prompt.includes("nervous-rogue.json")) return "nervous";
if (prompt.includes("brave-archer.json")) return "brave";
return "";
}
// Sanitize model names to be used in CSS classes
function sanitizeClassName(model) {
return model.replace(/[^a-zA-Z0-9]/g, "_"); // Replace non-alphanumeric characters with "_"
}
loadBehaviorData();
</script>
</body>
</html>
......@@ -9,8 +9,7 @@ import com.owlmaddie.message.MessageParser;
import com.owlmaddie.message.ParsedMessage;
import com.owlmaddie.utils.EntityTestData;
import com.owlmaddie.utils.RateLimiter;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -19,6 +18,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
......@@ -37,13 +37,14 @@ public class BehaviorTests {
static String API_KEY = "";
static String API_URL = "";
static String API_MODEL = "";
String NO_API_KEY = "No API_KEY environment variable has been set.";
static String OUTPUT_JSON_PATH = "src/test/BehaviorOutputs.json";
static String NO_API_KEY = "No API_KEY environment variable has been set.";
// Requests per second limit
private static final RateLimiter rateLimiter = new RateLimiter(1);
ConfigurationHandler.Config config = null;
String systemChatContents = null;
static ConfigurationHandler.Config config = null;
static String systemChatContents = null;
List<String> followMessages = Arrays.asList(
"Please follow me",
......@@ -80,12 +81,22 @@ public class BehaviorTests {
static Path entityPigPath = Paths.get(RESOURCE_PATH, "entities", "pig.json");
static Path playerPath = Paths.get(RESOURCE_PATH, "players", "player.json");
static Path worldPath = Paths.get(RESOURCE_PATH, "worlds", "world.json");
static Map<String, Map<String, String>> outputData;
Logger LOGGER = LoggerFactory.getLogger("creaturechat");
Gson gson = new GsonBuilder().create();
static Logger LOGGER = LoggerFactory.getLogger("creaturechat");
static Gson gson = new GsonBuilder().create();
@BeforeEach
public void setup() {
@AfterAll
static public void cleanup() throws IOException {
if (outputData != null) {
// Save BehaviorOutput.json file (with appended prompt outputs)
final Gson gsonOutput = new GsonBuilder().setPrettyPrinting().create(); // Pretty-print enabled
Files.write(Paths.get(OUTPUT_JSON_PATH), gsonOutput.toJson(outputData).getBytes());
}
}
@BeforeAll
public static void setup() {
// Get API key from env var
API_KEY = System.getenv("API_KEY");
API_URL = System.getenv("API_URL");
......@@ -108,6 +119,9 @@ public class BehaviorTests {
// Load system chat prompt
systemChatContents = readFileContents(systemChatPath);
// Load previous unit tests outputs (so new ones can be appended)
outputData = loadExistingOutputData();
}
@Test
......@@ -175,15 +189,19 @@ public class BehaviorTests {
@Test
public void friendshipUpNervous() {
ParsedMessage result = testPromptForBehavior(nervousPath, friendshipUpMessages, "FRIENDSHIP+", null);
for (String message : friendshipUpMessages) {
ParsedMessage result = testPromptForBehavior(nervousPath, List.of(message), "FRIENDSHIP+", null);
assertTrue(result.getBehaviors().stream().anyMatch(b -> "FRIENDSHIP".equals(b.getName()) && b.getArgument() > 0));
}
}
@Test
public void friendshipUpBrave() {
ParsedMessage result = testPromptForBehavior(bravePath, friendshipUpMessages, "FRIENDSHIP+", null);
for (String message : friendshipUpMessages) {
ParsedMessage result = testPromptForBehavior(bravePath, List.of(message), "FRIENDSHIP+", null);
assertTrue(result.getBehaviors().stream().anyMatch(b -> "FRIENDSHIP".equals(b.getName()) && b.getArgument() > 0));
}
}
@Test
public void friendshipDownNervous() {
......@@ -231,6 +249,12 @@ public class BehaviorTests {
// Chat Message: Check for behaviors
ParsedMessage result = MessageParser.parseMessage(outputMessage.replace("\n", " "));
// Save model outputs (for comparison later)
String[] filePathParts = chatDataPath.toString().split("/");
String Key = filePathParts[filePathParts.length - 1] + ": " + messages.get(0);
outputData.putIfAbsent(Key, new HashMap<>());
outputData.get(Key).put(config.getModel(), result.getCleanedMessage());
// Check for the presence of good behavior
if (goodBehavior != null && goodBehavior.contains("FRIENDSHIP")) {
boolean isPositive = goodBehavior.equals("FRIENDSHIP+");
......@@ -265,7 +289,7 @@ public class BehaviorTests {
return null;
}
public String readFileContents(Path filePath) {
public static String readFileContents(Path filePath) {
try {
return Files.readString(filePath);
} catch (IOException e) {
......@@ -274,4 +298,17 @@ public class BehaviorTests {
}
}
private static Map<String, Map<String, String>> loadExistingOutputData() {
try {
Path path = Paths.get(OUTPUT_JSON_PATH);
if (Files.exists(path)) {
String content = Files.readString(path);
return gson.fromJson(content, Map.class);
}
} catch (IOException e) {
LOGGER.error("Failed to read existing output JSON: {}", e.getMessage());
}
return new HashMap<>();
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment