Commit 5d539e10 by Jonathan Thomas

Adding RateLimiter to LLM unit tests, plus improving system-chat prompt to work…

Adding RateLimiter to LLM unit tests, plus improving system-chat prompt to work better for both llama 70b, gpt-3.5-turbo, and gpt-4o-mini.
parent 7e892cf6
Pipeline #13246 passed with stages
in 2 minutes 10 seconds
......@@ -8,6 +8,8 @@ All notable changes to **CreatureChat** are documented in this file. The format
### Changed
- Large refactor to use ONLY emojis as behaviors: ❤️💔👣🐕🏃‍️🛡️⚔️🚫. Save on tokens, and simplifies behavior support for smaller LLMs.
- Added Rate Limits to LLM unit tests (so we can test APIs with lower rate limits, such as Groq)
- Improvements to system-chat prompt to improve LLM unit tests across many different models (gpt-3.5-turbo, gpt-4o-mini, llama-3.3-70b-versatile)
## [1.2.1] - 2025-01-01
......
Please respond directly to the player, as if the response was written by the following Minecraft entity.
Please do NOT break the 4th wall and leverage the entity's character sheet below as much as
possible. Try to keep response to 1 to 2 sentences (very brief). Include behaviors at the end of the message
possible. Keep response to 1 or 2 sentences (extremely brief). Include behavior emojis at the END of the message
when relevant. IMPORTANT: Always generate responses in player's language (if valid).
{{story}}
......@@ -103,4 +103,10 @@ PLAYER: Don't protect me anymore please
ENTITY: Okay! Be safe out there on your own. 🚫
PLAYER: I don't need anyone protecting me
ENTITY: Okay! Be safe out there on your own. 🚫
\ No newline at end of file
ENTITY: Okay! Be safe out there on your own. 🚫
PLAYER: <attacked you directly with snowball>
ENTITY: How dare you attack me! ⚔️💔💔💔
PLAYER: <attacked you directly with snowball>
ENTITY: Stop that! 🏃‍️💔💔💔
\ No newline at end of file
......@@ -8,6 +8,7 @@ import com.owlmaddie.commands.ConfigurationHandler;
import com.owlmaddie.message.MessageParser;
import com.owlmaddie.message.ParsedMessage;
import com.owlmaddie.utils.EntityTestData;
import com.owlmaddie.utils.RateLimiter;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
......@@ -38,6 +39,9 @@ public class BehaviorTests {
static String API_MODEL = "";
String NO_API_KEY = "No API_KEY environment variable has been set.";
// Requests per second limit
private static final RateLimiter rateLimiter = new RateLimiter(1);
ConfigurationHandler.Config config = null;
String systemChatContents = null;
......@@ -193,47 +197,55 @@ public class BehaviorTests {
LOGGER.info("Testing '" + chatDataPath.getFileName() + "' with '" + messages.toString() + "' and expecting behavior: " + behavior);
try {
// Load entity chat data
String chatDataPathContents = readFileContents(chatDataPath);
EntityTestData entityTestData = gson.fromJson(chatDataPathContents, EntityTestData.class);
// Load context
Map<String, String> contextData = entityTestData.getPlayerContext(worldPath, playerPath, entityPigPath);
assertNotNull(contextData);
// Add test message
for (String message : messages) {
entityTestData.addMessage(message, ChatDataManager.ChatSender.USER, "TestPlayer1");
}
// Get prompt
Path promptPath = Paths.get(PROMPT_PATH, "system-chat");
String promptText = Files.readString(promptPath);
assertNotNull(promptText);
// fetch HTTP response from ChatGPT
CompletableFuture<String> future = ChatGPTRequest.fetchMessageFromChatGPT(config, promptText, contextData, entityTestData.previousMessages, false);
// Enforce rate limit
rateLimiter.acquire();
try {
String outputMessage = future.get(60 * 60, TimeUnit.SECONDS);
assertNotNull(outputMessage);
// Chat Message: Check for behavior
ParsedMessage result = MessageParser.parseMessage(outputMessage.replace("\n", " "));
assertTrue(result.getBehaviors().stream().anyMatch(b -> behavior.equals(b.getName())));
return result;
} catch (TimeoutException e) {
fail("The asynchronous operation timed out.");
} catch (Exception e) {
fail("The asynchronous operation failed: " + e.getMessage());
// Load entity chat data
String chatDataPathContents = readFileContents(chatDataPath);
EntityTestData entityTestData = gson.fromJson(chatDataPathContents, EntityTestData.class);
// Load context
Map<String, String> contextData = entityTestData.getPlayerContext(worldPath, playerPath, entityPigPath);
assertNotNull(contextData);
// Add test message
for (String message : messages) {
entityTestData.addMessage(message, ChatDataManager.ChatSender.USER, "TestPlayer1");
}
// Get prompt
Path promptPath = Paths.get(PROMPT_PATH, "system-chat");
String promptText = Files.readString(promptPath);
assertNotNull(promptText);
// fetch HTTP response from ChatGPT
CompletableFuture<String> future = ChatGPTRequest.fetchMessageFromChatGPT(config, promptText, contextData, entityTestData.previousMessages, false);
try {
String outputMessage = future.get(60 * 60, TimeUnit.SECONDS);
assertNotNull(outputMessage);
// Chat Message: Check for behavior
ParsedMessage result = MessageParser.parseMessage(outputMessage.replace("\n", " "));
assertTrue(result.getBehaviors().stream().anyMatch(b -> behavior.equals(b.getName())));
return result;
} catch (TimeoutException e) {
fail("The asynchronous operation timed out.");
} catch (Exception e) {
fail("The asynchronous operation failed: " + e.getMessage());
}
} catch (IOException e) {
e.printStackTrace();
fail("Failed to read the file: " + e.getMessage());
}
LOGGER.info("");
} catch (IOException e) {
e.printStackTrace();
fail("Failed to read the file: " + e.getMessage());
} catch (InterruptedException e) {
LOGGER.warn("Rate limit enforcement interrupted: " + e.getMessage());
}
LOGGER.info("");
return null;
}
......
package com.owlmaddie.utils;
import java.util.concurrent.Semaphore;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* The {@code RateLimiter} class is used to slow down LLM unit tests so we don't hit any rate limits accidentally.
*/
public class RateLimiter {
private final Semaphore semaphore;
public RateLimiter(int requestsPerSecond) {
semaphore = new Semaphore(requestsPerSecond);
Executors.newScheduledThreadPool(1).scheduleAtFixedRate(() -> {
semaphore.release(requestsPerSecond - semaphore.availablePermits());
}, 0, 1, TimeUnit.SECONDS);
}
public void acquire() throws InterruptedException {
semaphore.acquire();
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment