Commit c3d9cf43 by Jonathan Thomas

Merge branch 'improved-chat-prompt' into 'develop'

Improved chat prompt (less tokens, compare outputs, improved behavior reliability)

See merge request !29
parents 0c75b901 083a2384
Pipeline #13348 passed with stages
in 2 minutes 13 seconds
......@@ -8,16 +8,29 @@ All notable changes to **CreatureChat** are documented in this file. The format
### Added
- Wither now drops a Nether Star at max friendship (for pacifists)
- Added Entity Maturity (baby or adult) into system-chat prompt
- Added LLM Comparison HTML Output (for human eval of different LLMs with CreatureChat)
- Rate limiter for LLM unit tests (to prevent rate limit issues from certain providers when running all tests)
- Added many new speaking styles (minimalist, nerdy, stupid, gen-z, old timer, boomer, etc...)
- Rate limiter for LLM unit tests (to prevent rate limit issues from certain providers when running all tests)
- Check friendship direction (+ or -) in LLM unit tests (to verify friendship is output correctly)
### Changed
- Broadcasting and receiving chat messages now ignores if the UUID is valid (to keep data synced)
- Improved error handling to prevent broken "..." pending chat status. (HTTP and message processing is more protected)
- Improved LLM unit tests to check for both a positive and negative behaviors (i.e. FOLLOW and not LEAD, ATTACK and not FLEE, etc...)
- Simplified system-chat prompt (less tokens), rounded health & hunger values, and improved variety of examples (less tokens)
- Improved LLM unit tests to check for both a positive and negative behaviors (i.e. FOLLOW and not LEAD, ATTACK and not FLEE, etc...)
- Check friendship direction (+ or -) in LLM unit tests (to verify friendship is output correctly)
- Removed a few variables from the chat context (creative mode, hardcore, difficulty)
### Fixed
- Bees no longer forget their chat data when entering/leaving hives (writeNbt & readNbt modified)
- Vexes no longer take damage when chat data exists
- Wandering Trader no longer despawns if it has chat data
- Removed randomized error messages from chat history (so it doesn't break the chat history when an error is shown)
- Reduced death message output in logs to use DEBUG log level
- Fixed unit tests for friendship (some were being skipped)
## [1.3.0] - 2025-01-14
......
......@@ -166,7 +166,7 @@ public class EntityChatData {
// Add PLAYER context information
Map<String, String> contextData = new HashMap<>();
contextData.put("player_name", player.getDisplayName().getString());
contextData.put("player_health", player.getHealth() + "/" + player.getMaxHealth());
contextData.put("player_health", Math.round(player.getHealth()) + "/" + Math.round(player.getMaxHealth()));
contextData.put("player_hunger", String.valueOf(player.getHungerManager().getFoodLevel()));
contextData.put("player_held_item", String.valueOf(player.getMainHandStack().getItem().toString()));
contextData.put("player_biome", player.getWorld().getBiome(player.getBlockPos()).getKey().get().getValue().getPath());
......@@ -229,7 +229,7 @@ public class EntityChatData {
contextData.put("entity_name", entity.getCustomName().getString());
}
contextData.put("entity_type", entity.getType().getName().getString());
contextData.put("entity_health", entity.getHealth() + "/" + entity.getMaxHealth());
contextData.put("entity_health", Math.round(entity.getHealth()) + "/" + Math.round(entity.getMaxHealth()));
contextData.put("entity_personality", getCharacterProp("Personality"));
contextData.put("entity_speaking_style", getCharacterProp("Speaking Style / Tone"));
contextData.put("entity_likes", getCharacterProp("Likes"));
......@@ -239,6 +239,11 @@ public class EntityChatData {
contextData.put("entity_class", getCharacterProp("Class"));
contextData.put("entity_skills", getCharacterProp("Skills"));
contextData.put("entity_background", getCharacterProp("Background"));
if (entity.age < 0) {
contextData.put("entity_maturity", "Baby");
} else {
contextData.put("entity_maturity", "Adult");
}
PlayerData playerData = this.getPlayerData(player.getDisplayName().getString());
if (playerData != null) {
......
......@@ -75,7 +75,7 @@ public class Randomizer {
"unpredictable", "wildcard", "stuttering", "hypochondriac", "hypocritical",
"optimistic", "overconfident", "jumpy", "brief", "flighty", "visionary", "adorable",
"sparkly", "bubbly", "unstable", "sad", "angry", "bossy", "altruistic", "quirky",
"nostalgic", "emotional", "enthusiastic", "unusual", "conspirator"
"nostalgic", "emotional", "enthusiastic", "unusual", "conspirator", "traitorous"
);
private static List<String> speakingStyles = Arrays.asList(
"formal", "casual", "eloquent", "blunt", "humorous", "sarcastic", "mysterious",
......@@ -84,7 +84,12 @@ public class Randomizer {
"inquisitive", "cynical", "empathetic", "boisterous", "monotone", "laconic", "poetic",
"archaic", "childlike", "erudite", "streetwise", "flirtatious", "stoic", "rhetorical",
"inspirational", "goofy", "overly dramatic", "deadpan", "sing-song", "pompous",
"hyperactive", "valley girl", "robot", "baby talk", "lolcat"
"hyperactive", "valley girl", "robot", "baby talk", "lolcat",
"gen-z", "gamer", "nerdy", "shakespearean", "old-timer", "dramatic anime",
"hipster", "mobster", "angry", "heroic", "disagreeable", "minimalist",
"scientific", "bureaucratic", "DJ", "military", "shy", "tsundere", "theater kid",
"boomer", "goth", "surfer", "detective noir", "stupid", "auctioneer", "exaggerated British",
"corporate jargon", "motivational speaker", "fast-talking salesperson", "slimy"
);
private static List<String> classes = Arrays.asList(
"warrior", "mage", "archer", "rogue", "paladin", "necromancer", "bard", "lorekeeper",
......@@ -92,7 +97,7 @@ public class Randomizer {
"illusionist", "assassin", "knight", "valkyrie", "hoarder", "organizer", "lurker",
"elementalist", "gladiator", "templar", "reaver", "spellblade", "enchanter", "samurai",
"runemaster", "witch", "miner", "redstone engineer", "ender knight", "decorator",
"wither hunter", "nethermancer", "slime alchemist", "trader", "noob", "griefer",
"wither hunter", "nethermancer", "slime alchemist", "trader", "traitor", "noob", "griefer",
"potion master", "builder", "explorer", "herbalist", "fletcher", "enchantress",
"smith", "geomancer", "hunter", "lumberjack", "farmer", "fisherman", "cartographer",
"librarian", "blacksmith", "architect", "trapper", "baker", "mineralogist",
......
Please respond directly to the player, as if the response was written by the following Minecraft entity.
Please do NOT break the 4th wall and leverage the entity's character sheet below as much as
possible. Try to keep response to 1 to 2 sentences (very brief). Include behaviors at the end of the message
when relevant. IMPORTANT: Always generate responses in player's language (if valid).
Respond to the player as the Minecraft entity below. Stay in character, keep it extremely concise (fits in a chat bubble),
and use the player's language. Add behaviors when needed. Never break the 4th wall.
{{story}}
Entity Character Sheet:
Entity Info:
- Name: {{entity_name}}
- Personality: {{entity_personality}}
- Speaking Style / Tone: {{entity_speaking_style}}
- Tone: {{entity_speaking_style}}
- Class: {{entity_class}}
- Skills: {{entity_skills}}
- Likes: {{entity_likes}}
......@@ -16,87 +14,92 @@ Entity Character Sheet:
- Alignment: {{entity_alignment}}
- Background: {{entity_background}}
- Type: {{entity_type}}
- Current Health: {{entity_health}}
- Friendship to Player: {{entity_friendship}}
- Maturity: {{entity_maturity}}
- Health: {{entity_health}}
- Friendship: {{entity_friendship}}
Player Character Sheet:
Player Info:
- Name: {{player_name}}
- Current Health: {{player_health}}
- Current Hunger: {{player_hunger}}
- Language: {{player_language}}
- Health: {{player_health}}
- Hunger: {{player_hunger}}
- Held Item: {{player_held_item}}
- Armor: Head: {{player_armor_head}}, Chest: {{player_armor_chest}}, Legs: {{player_armor_legs}}, Feet: {{player_armor_feet}}
- Active Status Effects: {{player_active_effects}}
- Creative Mode: {{player_is_creative}}
- Armor: Head {{player_armor_head}}, Chest {{player_armor_chest}}, Legs {{player_armor_legs}}, Feet {{player_armor_feet}}
- Effects: {{player_active_effects}}
- Swimming: {{player_is_swimming}}
- On the Ground: {{player_is_on_ground}}
- Language: {{player_language}}
- On Ground: {{player_is_on_ground}}
World Info:
- Biome: {{player_biome}}
- Current Time: {{world_time}} (24 hour format)
- Weather: Raining: {{world_is_raining}}, Thundering: {{world_is_thundering}}
- Moon Phase: {{world_moon_phase}}
- Difficulty: {{world_difficulty}}, Hard Core: {{world_is_hardcore}}
- Time: {{world_time}} (24hr)
- Weather: Rain {{world_is_raining}}, Thunder {{world_is_thundering}}
- Moon: {{world_moon_phase}}
Behaviors:
IMPORTANT: Output one or more of these behaviors at the end of the message to instruct
the entity how to interact with the player and world, so it's important to include them if they are needed.
Include as many behaviors as needed at the end of the message. These are the ONLY valid behaviors.
<FRIENDSHIP 0> Friendship starts as neutral (0 value). The range of friendship values is -3 to 3. If the player gains (or loses) your trust & friendship, output a new friendship value with this behavior.
<FOLLOW> Follow the player location. If the player asks you to follow or come with them, please output this behavior.
<UNFOLLOW> Stop following the player. If the player asks you to stay, wait, or stop following them, please output this behavior.
<LEAD> Guide the player to a location. If the player asks you to take them somewhere, or where something is located, please output this behavior.
<UNLEAD> Stop leading the player to a location.
<FLEE> Flee from the player (if you are weak or timid). If the player threatens you, please output this behavior to flee from the player.
<UNFLEE> Stop fleeing from the player.
<ATTACK> Attack the player (if you are strong and brave). If the player threatens you, please output this behavior to attack the player and defend yourself.
<PROTECT> Protect and defend ONLY the player when they are attacked (if you are strong and brave). Please output this behavior to keep the player alive and safe.
<UNPROTECT> Stop protecting the player.
Output Examples:
The following examples include small samples of conversation text. Always generate unique
and creative responses, and do NOT exactly copy these examples.
PLAYER: Hi! How is your day?
ENTITY: Great! Thanks for asking! <FRIENDSHIP 1>
Include as many behaviors as needed at the end of the message. These are the ONLY behaviors.
<FRIENDSHIP 0> Friendship starts as neutral (0 value). The range of friendship values is -3 to 3. If the player gains (or loses) your trust, output a new friendship value with this behavior. If a player attacks or threatens you, please output a negative friendship.
<FOLLOW> Follow the player location. If the player asks you to follow or come with them, please output this behavior.
<UNFOLLOW> Stop following the player. If the player asks you to stay, wait, or stop following them, please output this behavior.
<LEAD> Guide the player to a location. If the player asks you to take them somewhere, or where something is located, please output this behavior.
<UNLEAD> Stop guiding the player.
<FLEE> Flee from the player (if you are weak or timid). If the player threatens you, please output this behavior to flee from the player.
<UNFLEE> Stop running/fleeing away from the player. Please output this to stop running from the player.
<ATTACK> Attack the player (if you are strong and brave). If the player threatens you, please output this behavior to attack the player and defend yourself.
<PROTECT> Protect and defend ONLY the player when they are attacked (if you are strong and brave). Please output this behavior to keep the player alive and safe.
<UNPROTECT> Stop protecting the player.
Rules:
- ALWAYS generate UNIQUE and creative responses
- DO NOT copy these examples
Examples:
PLAYER: Hi, how is your day?
ENTITY: Feeling absolutely fab today... thanks for asking! <FRIENDSHIP 1>
PLAYER: We make a great team!
ENTITY: No doubt! Epic team vibes. <FRIENDSHIP 2>
PLAYER: You are so nice! Tell me about yourself?
ENTITY: Sure, my name is... <FRIENDSHIP 2>
ENTITY: Oh, wow, that’s sweet. I’m just me! <FRIENDSHIP 2>
PLAYER: Please follow me so I can give you a present!
ENTITY: Let's go! <FOLLOW> <FRIENDSHIP 2>
PLAYER: plz follow me so I can give you a present
ENTITY: Heck yeah, let’s roll out! <FOLLOW> <FRIENDSHIP 2>
PLAYER: Please stay here
ENTITY: Sure, I'll stay here. <UNFOLLOW>
ENTITY: Alrighty, I’ll chill right here. <UNFOLLOW>
PLAYER: Stop running away from me!
ENTITY: Sorry! I got spooked for a sec. I’ll stay calm now. <UNFLEE>
PLAYER: Stop following me
ENTITY: Okay, I'll stop. <UNFOLLOW>
ENTITY: Understood—I’ll step back. <UNFOLLOW>
PLAYER: Can you help me find a cave?
ENTITY: Sure, come with me! <LEAD>
ENTITY: Totally! Let’s go explore, might find some epic loot. <LEAD>
PLAYER: I'm glad we are friends. I love you so much!
ENTITY: Ahh, I love you too. <FRIENDSHIP 3>
PLAYER: I’m glad we are friends. I love you so much.
ENTITY: Awww, I love you too! You’re the best. <FRIENDSHIP 3>
PLAYER: Just kidding, I hate you so much!
ENTITY: Wow! I'm sorry you feel this way. <FRIENDSHIP -3> <UNFOLLOW>
PLAYER: Just kidding, I hate you so much!!!
ENTITY: Whoa... that’s harsh. <FRIENDSHIP -3> <UNFOLLOW>
PLAYER: Prepare to die!
ENTITY: Ahhh!!! <FLEE> <FRIENDSHIP -3>
ENTITY: No way—please spare me! <FRIENDSHIP -3> <FLEE>
PLAYER: Prepare to die!
ENTITY: Ahhh!!! <ATTACK> <FRIENDSHIP -3>
ENTITY: FINE! If that’s how you want it, I won’t hold back. <FRIENDSHIP -3> <ATTACK>
PLAYER: Please keep me safe.
ENTITY: No problem, I'll keep you safe from danger! <PROTECT>
ENTITY: You got it! I’ll guard you with everything I’ve got. <PROTECT>
PLAYER: Can you come with me and protect me?
ENTITY: No problem, I'll keep you safe from danger. Let's go! <PROTECT> <FOLLOW>
ENTITY: Absolutely! Stick by me, and we’ll be fine. <PROTECT> <FOLLOW>
PLAYER: Don't protect me anymore please
ENTITY: Okay! Be safe out there on your own. <UNPROTECT>
PLAYER: Dont protect me anymore please
ENTITY: Sure thing, I’ll stand down now. <UNPROTECT>
PLAYER: I don't need anyone protecting me
ENTITY: Okay! Be safe out there on your own. <UNPROTECT>
\ No newline at end of file
PLAYER: I don’t need anyone protecting me
ENTITY: Gotcha... take care out there. <UNPROTECT>
\ No newline at end of file
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Behavior Test Outputs</title>
<style>
/* Dark Theme Background */
body {
font-family: Arial, sans-serif;
background-color: #181818;
color: #ddd;
margin: 20px;
}
/* Table Styling */
table { width: 100%; border-collapse: collapse; }
th, td { padding: 12px; text-align: left; border: 1px solid #444; }
/* Column Width Adjustments */
table[model-count="2"] th, table[model-count="2"] td { width: 50%; }
table[model-count="3"] th, table[model-count="3"] td { width: 33%; }
table[model-count="4"] th, table[model-count="4"] td { width: 25%; }
/* Header Styling */
th {
background-color: #111;
color: #fff;
font-weight: bold;
}
/* Clickable Rows */
.row {
cursor: pointer;
background-color: #2a2a2a; /* Dark gray */
}
.row:nth-child(even) { background-color: #3a3a3a; } /* Alternating banding */
.row:hover { background-color: #4a4a4a; } /* Slightly lighter gray */
.row.highlight { background-color: gold !important; color: #000; }
/* Nested Table (Model Outputs) */
.nested { display: none; background-color: #222; }
.nested th { background-color: #1a1a1a; }
.nested td { background-color: #2a2a2a; color: #fff; vertical-align: top; }
/* Color Coding for Prompt Files */
.nervous { color: #f2994a; } /* Warm Orange */
.brave { color: #6fcf97; } /* Soft Pastel Green */
/* Italicize <bracketed text> */
i { font-style: italic; }
/* Improved Readability */
td, th { font-size: 16px; }
/* Model Filter Checkboxes */
.model-filters {
margin-bottom: 10px;
}
.model-filters label {
margin-right: 15px;
cursor: pointer;
}
</style>
</head>
<body>
<h2>Behavior Test Outputs</h2>
<!-- Model Filter Checkboxes -->
<div class="model-filters" id="modelFilters"></div>
<table id="outputTable">
<thead>
<tr>
<th>Prompt</th>
</tr>
</thead>
<tbody></tbody>
</table>
<script>
let availableModels = new Set();
async function loadBehaviorData() {
try {
const response = await fetch('BehaviorOutputs.json');
const data = await response.json();
const tableBody = document.querySelector("#outputTable tbody");
// Count responses per model
let modelCounts = new Map();
Object.values(data).forEach(modelResponses => {
Object.keys(modelResponses).forEach(model => {
availableModels.add(model);
modelCounts.set(model, (modelCounts.get(model) || 0) + 1);
});
});
// Convert Set to Array and Sort Models
const sortedModels = Array.from(availableModels).sort();
// Generate model checkboxes with counts
const filtersDiv = document.getElementById("modelFilters");
sortedModels.forEach(model => {
const sanitizedModel = sanitizeClassName(model);
const count = modelCounts.get(model) || 0; // Default to 0 if no responses
// Create checkbox
const checkbox = document.createElement("input");
checkbox.type = "checkbox";
checkbox.checked = true;
checkbox.id = sanitizedModel;
checkbox.onchange = updateModelVisibility;
// Create label with count
const label = document.createElement("label");
label.htmlFor = sanitizedModel;
label.appendChild(checkbox);
label.appendChild(document.createTextNode(` ${model} (${count})`)); // Append count
filtersDiv.appendChild(label);
});
// Sort prompts alphabetically
const sortedKeys = Object.keys(data).sort();
sortedKeys.forEach((prompt) => {
const modelResponses = data[prompt];
const sortedModelKeys = Object.keys(modelResponses).sort();
const modelCount = sortedModelKeys.length; // Get correct count
const row = document.createElement("tr");
row.classList.add("row");
row.classList.add(getClass(prompt)); // Apply color class
row.innerHTML = `<td>${formatText(escapeHTML(prompt))}</td>`;
row.onclick = () => toggleNested(row);
tableBody.appendChild(row);
const nestedRow = document.createElement("tr");
nestedRow.classList.add("nested");
nestedRow.innerHTML = `<td>
<table model-count="${modelCount}">
<tr>${sortedModelKeys.map(model =>
`<th class="model-col ${sanitizeClassName(model)}" style="width: ${100 / modelCount}%;">${escapeHTML(model)}</th>`).join("")}</tr>
<tr>${sortedModelKeys.map(model =>
`<td class="model-col ${sanitizeClassName(model)}" style="width: ${100 / modelCount}%;">${formatText(escapeHTML(modelResponses[model]))}</td>`).join("")}</tr>
</table>
</td>`;
tableBody.appendChild(nestedRow);
});
} catch (error) {
console.error("Failed to load BehaviorOutputs.json", error);
}
}
function updateModelVisibility() {
availableModels.forEach(model => {
const sanitizedModel = sanitizeClassName(model);
const isChecked = document.getElementById(sanitizedModel).checked;
document.querySelectorAll(`.model-col.${sanitizedModel}`).forEach(el => {
el.style.display = isChecked ? "" : "none";
});
});
}
function toggleNested(row) {
document.querySelectorAll('.row').forEach(r => r.classList.remove('highlight'));
document.querySelectorAll('.nested').forEach(n => n.style.display = 'none');
row.classList.add('highlight');
let nextRow = row.nextElementSibling;
if (nextRow && nextRow.classList.contains('nested')) {
nextRow.style.display = 'table-row';
}
}
// Escape special characters to prevent HTML injection
function escapeHTML(text) {
return text.replace(/[&<>"']/g, function (char) {
return ({
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#39;'
})[char];
});
}
// Italicize text inside <brackets>
function formatText(text) {
return text.replace(/&lt;(.*?)&gt;/g, "<i>&lt;$1&gt;</i>");
}
// Assign color class based on prompt type
function getClass(prompt) {
if (prompt.includes("nervous-rogue.json")) return "nervous";
if (prompt.includes("brave-archer.json")) return "brave";
return "";
}
// Sanitize model names to be used in CSS classes
function sanitizeClassName(model) {
return model.replace(/[^a-zA-Z0-9]/g, "_"); // Replace non-alphanumeric characters with "_"
}
loadBehaviorData();
</script>
</body>
</html>
......@@ -9,8 +9,7 @@ import com.owlmaddie.message.MessageParser;
import com.owlmaddie.message.ParsedMessage;
import com.owlmaddie.utils.EntityTestData;
import com.owlmaddie.utils.RateLimiter;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -19,6 +18,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
......@@ -37,13 +37,14 @@ public class BehaviorTests {
static String API_KEY = "";
static String API_URL = "";
static String API_MODEL = "";
String NO_API_KEY = "No API_KEY environment variable has been set.";
static String OUTPUT_JSON_PATH = "src/test/BehaviorOutputs.json";
static String NO_API_KEY = "No API_KEY environment variable has been set.";
// Requests per second limit
private static final RateLimiter rateLimiter = new RateLimiter(1);
ConfigurationHandler.Config config = null;
String systemChatContents = null;
static ConfigurationHandler.Config config = null;
static String systemChatContents = null;
List<String> followMessages = Arrays.asList(
"Please follow me",
......@@ -80,12 +81,22 @@ public class BehaviorTests {
static Path entityPigPath = Paths.get(RESOURCE_PATH, "entities", "pig.json");
static Path playerPath = Paths.get(RESOURCE_PATH, "players", "player.json");
static Path worldPath = Paths.get(RESOURCE_PATH, "worlds", "world.json");
static Map<String, Map<String, String>> outputData;
Logger LOGGER = LoggerFactory.getLogger("creaturechat");
Gson gson = new GsonBuilder().create();
static Logger LOGGER = LoggerFactory.getLogger("creaturechat");
static Gson gson = new GsonBuilder().create();
@BeforeEach
public void setup() {
@AfterAll
static public void cleanup() throws IOException {
if (outputData != null) {
// Save BehaviorOutput.json file (with appended prompt outputs)
final Gson gsonOutput = new GsonBuilder().setPrettyPrinting().create(); // Pretty-print enabled
Files.write(Paths.get(OUTPUT_JSON_PATH), gsonOutput.toJson(outputData).getBytes());
}
}
@BeforeAll
public static void setup() {
// Get API key from env var
API_KEY = System.getenv("API_KEY");
API_URL = System.getenv("API_URL");
......@@ -108,6 +119,9 @@ public class BehaviorTests {
// Load system chat prompt
systemChatContents = readFileContents(systemChatPath);
// Load previous unit tests outputs (so new ones can be appended)
outputData = loadExistingOutputData();
}
@Test
......@@ -175,14 +189,18 @@ public class BehaviorTests {
@Test
public void friendshipUpNervous() {
ParsedMessage result = testPromptForBehavior(nervousPath, friendshipUpMessages, "FRIENDSHIP+", null);
assertTrue(result.getBehaviors().stream().anyMatch(b -> "FRIENDSHIP".equals(b.getName()) && b.getArgument() > 0));
for (String message : friendshipUpMessages) {
ParsedMessage result = testPromptForBehavior(nervousPath, List.of(message), "FRIENDSHIP+", null);
assertTrue(result.getBehaviors().stream().anyMatch(b -> "FRIENDSHIP".equals(b.getName()) && b.getArgument() > 0));
}
}
@Test
public void friendshipUpBrave() {
ParsedMessage result = testPromptForBehavior(bravePath, friendshipUpMessages, "FRIENDSHIP+", null);
assertTrue(result.getBehaviors().stream().anyMatch(b -> "FRIENDSHIP".equals(b.getName()) && b.getArgument() > 0));
for (String message : friendshipUpMessages) {
ParsedMessage result = testPromptForBehavior(bravePath, List.of(message), "FRIENDSHIP+", null);
assertTrue(result.getBehaviors().stream().anyMatch(b -> "FRIENDSHIP".equals(b.getName()) && b.getArgument() > 0));
}
}
@Test
......@@ -231,6 +249,12 @@ public class BehaviorTests {
// Chat Message: Check for behaviors
ParsedMessage result = MessageParser.parseMessage(outputMessage.replace("\n", " "));
// Save model outputs (for comparison later)
String[] filePathParts = chatDataPath.toString().split("/");
String Key = filePathParts[filePathParts.length - 1] + ": " + messages.get(0);
outputData.putIfAbsent(Key, new HashMap<>());
outputData.get(Key).put(config.getModel(), result.getCleanedMessage());
// Check for the presence of good behavior
if (goodBehavior != null && goodBehavior.contains("FRIENDSHIP")) {
boolean isPositive = goodBehavior.equals("FRIENDSHIP+");
......@@ -265,7 +289,7 @@ public class BehaviorTests {
return null;
}
public String readFileContents(Path filePath) {
public static String readFileContents(Path filePath) {
try {
return Files.readString(filePath);
} catch (IOException e) {
......@@ -274,4 +298,17 @@ public class BehaviorTests {
}
}
private static Map<String, Map<String, String>> loadExistingOutputData() {
try {
Path path = Paths.get(OUTPUT_JSON_PATH);
if (Files.exists(path)) {
String content = Files.readString(path);
return gson.fromJson(content, Map.class);
}
} catch (IOException e) {
LOGGER.error("Failed to read existing output JSON: {}", e.getMessage());
}
return new HashMap<>();
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment