Commit 1ee2a1ed by Jonathan Thomas

Parse behaviors out of messages anywhere the emojis are found. Also, normalize…

Parse behaviors out of messages anywhere the emojis are found. Also, normalize and only compare the base emojis when creating behaviors.
parent 5688c66a
...@@ -17,47 +17,45 @@ import java.util.regex.Pattern; ...@@ -17,47 +17,45 @@ import java.util.regex.Pattern;
public class MessageParser { public class MessageParser {
private static final Logger LOGGER = LoggerFactory.getLogger("creaturechat"); private static final Logger LOGGER = LoggerFactory.getLogger("creaturechat");
// Regex capturing all text in (group1), and trailing emojis in (group2). // We only match our "recognized" emojis anywhere in the text:
// Only these exact emojis are recognized, optionally with spaces before/after. // (Short versions shown here, but adjust to whatever codepoints you actually use.)
private static final Pattern TRAILING_BEHAVIORS = Pattern.compile("^(.*?)((?:\\s*(?:🚫|👣|🏃|🛡️|⚔️|🐕|❤️|💔))+)(.*)$");
// Regex to find each recognized emoji in the trailing chunk.
private static final Pattern RECOGNIZED_EMOJI = Pattern.compile("🚫|👣|🏃(?:‍[♂♀️])?|🛡(?:️)?|⚔(?:️)?|🐕|❤(?:️)?|💔"); private static final Pattern RECOGNIZED_EMOJI = Pattern.compile("🚫|👣|🏃(?:‍[♂♀️])?|🛡(?:️)?|⚔(?:️)?|🐕|❤(?:️)?|💔");
private static final Pattern ZWJ_OR_VARIATION = Pattern.compile("[\u200D\uFE0E\uFE0F]");
private static String normalizeEmoji(String raw) {
// remove zero-width joiners, variation selectors, etc.
return ZWJ_OR_VARIATION.matcher(raw).replaceAll("");
}
public static ParsedMessage parseMessage(String input) { public static ParsedMessage parseMessage(String input) {
LOGGER.debug("Parsing message: {}", input); LOGGER.debug("Parsing message: {}", input);
String updated = parseLegacyBehaviors(input); String updated = parseLegacyBehaviors(input);
// Separate trailing emojis from main text // List of extracted behaviors
Matcher m = TRAILING_BEHAVIORS.matcher(updated.stripTrailing());
String mainText = updated;
String trailing = "";
if (m.matches()) {
mainText = m.group(1).stripTrailing();
trailing = m.group(2).strip();
}
LOGGER.debug("Emoji sequence found: {}", trailing);
List<Behavior> behaviors = new ArrayList<>(); List<Behavior> behaviors = new ArrayList<>();
AtomicInteger friendshipScore = new AtomicInteger(0); AtomicInteger friendshipScore = new AtomicInteger(0);
AtomicBoolean hasFriendship = new AtomicBoolean(false); AtomicBoolean hasFriendship = new AtomicBoolean(false);
// Find all individual emojis from the trailing chunk // We'll remove recognized emojis from the entire message,
Matcher emojiMatcher = RECOGNIZED_EMOJI.matcher(trailing); // building a 'cleaned' version as we go
Matcher emojiMatcher = RECOGNIZED_EMOJI.matcher(updated);
StringBuffer cleanedBuffer = new StringBuffer();
while (emojiMatcher.find()) { while (emojiMatcher.find()) {
String emoji = emojiMatcher.group(); String rawEmoji = emojiMatcher.group();
LOGGER.debug("Processing emoji: {}", emoji); String normalized = normalizeEmoji(rawEmoji); // remove extra codepoints
LOGGER.debug("Processing raw emoji: {}, normalized: {}", rawEmoji, normalized);
switch (emoji) { // Handle only the base "normalized" emoji
case "🚫" -> behaviors.add(new Behavior("STOP", null)); switch (normalized) {
case "👣" -> behaviors.add(new Behavior("FOLLOW", null));
case "🏃" -> behaviors.add(new Behavior("FLEE", null)); case "🏃" -> behaviors.add(new Behavior("FLEE", null));
case "🛡️" -> behaviors.add(new Behavior("PROTECT", null)); case "👣" -> behaviors.add(new Behavior("FOLLOW", null));
case "⚔️" -> behaviors.add(new Behavior("ATTACK", null)); case "🛡" -> behaviors.add(new Behavior("PROTECT", null));
case "⚔" -> behaviors.add(new Behavior("ATTACK", null));
case "🐕" -> behaviors.add(new Behavior("LEAD", null)); case "🐕" -> behaviors.add(new Behavior("LEAD", null));
case "❤️" -> { case "🚫" -> behaviors.add(new Behavior("STOP", null));
case "❤" -> {
friendshipScore.incrementAndGet(); friendshipScore.incrementAndGet();
hasFriendship.set(true); hasFriendship.set(true);
} }
...@@ -66,16 +64,26 @@ public class MessageParser { ...@@ -66,16 +64,26 @@ public class MessageParser {
hasFriendship.set(true); hasFriendship.set(true);
} }
} }
// Replace recognized emoji with nothing in the cleaned text
emojiMatcher.appendReplacement(cleanedBuffer, "");
} }
// Append any remaining text after the last match
emojiMatcher.appendTail(cleanedBuffer);
// If friendship changed, add that as a separate behavior
if (hasFriendship.get()) { if (hasFriendship.get()) {
behaviors.add(new Behavior("FRIENDSHIP", friendshipScore.get())); behaviors.add(new Behavior("FRIENDSHIP", friendshipScore.get()));
} }
LOGGER.debug("Cleaned message: {}", mainText); // Now 'cleanedBuffer' has the text with recognized emojis removed
String cleanedMessage = cleanedBuffer.toString().stripTrailing();
LOGGER.debug("Cleaned message: {}", cleanedMessage);
LOGGER.debug("Extracted behaviors: {}", behaviors); LOGGER.debug("Extracted behaviors: {}", behaviors);
return new ParsedMessage(mainText, updated.trim(), behaviors); // Return the result
return new ParsedMessage(cleanedMessage, updated.trim(), behaviors);
} }
private static String parseLegacyBehaviors(String input) { private static String parseLegacyBehaviors(String input) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment