Commit 845b7535 by Jonathan Thomas

Added a filter toggle for comparing LLM outputs. Added deep seek results for comparison.

parent dbbd2610
Pipeline #13330 passed with stages
in 1 minute 53 seconds
...@@ -53,10 +53,22 @@ ...@@ -53,10 +53,22 @@
/* Improved Readability */ /* Improved Readability */
td, th { font-size: 16px; } td, th { font-size: 16px; }
/* Model Filter Checkboxes */
.model-filters {
margin-bottom: 10px;
}
.model-filters label {
margin-right: 15px;
cursor: pointer;
}
</style> </style>
</head> </head>
<body> <body>
<h2>Behavior Test Outputs</h2> <h2>Behavior Test Outputs</h2>
<!-- Model Filter Checkboxes -->
<div class="model-filters" id="modelFilters"></div>
<table id="outputTable"> <table id="outputTable">
<thead> <thead>
<tr> <tr>
...@@ -67,12 +79,38 @@ ...@@ -67,12 +79,38 @@
</table> </table>
<script> <script>
let availableModels = new Set();
async function loadBehaviorData() { async function loadBehaviorData() {
try { try {
const response = await fetch('BehaviorOutputs.json'); const response = await fetch('BehaviorOutputs.json');
const data = await response.json(); const data = await response.json();
const tableBody = document.querySelector("#outputTable tbody"); const tableBody = document.querySelector("#outputTable tbody");
// Collect unique model names
Object.values(data).forEach(modelResponses => {
Object.keys(modelResponses).forEach(model => availableModels.add(model));
});
// Generate model checkboxes
const filtersDiv = document.getElementById("modelFilters");
availableModels.forEach(model => {
const sanitizedModel = sanitizeClassName(model);
const checkbox = document.createElement("input");
checkbox.type = "checkbox";
checkbox.checked = true;
checkbox.id = sanitizedModel;
checkbox.onchange = updateModelVisibility;
const label = document.createElement("label");
label.htmlFor = sanitizedModel;
label.appendChild(checkbox);
label.appendChild(document.createTextNode(" " + model));
filtersDiv.appendChild(label);
});
// Sort prompts alphabetically // Sort prompts alphabetically
const sortedKeys = Object.keys(data).sort(); const sortedKeys = Object.keys(data).sort();
...@@ -91,8 +129,10 @@ ...@@ -91,8 +129,10 @@
nestedRow.classList.add("nested"); nestedRow.classList.add("nested");
nestedRow.innerHTML = `<td> nestedRow.innerHTML = `<td>
<table model-count="${modelCount}"> <table model-count="${modelCount}">
<tr>${Object.keys(modelResponses).map(model => `<th>${escapeHTML(model)}</th>`).join("")}</tr> <tr>${Object.keys(modelResponses).map(model =>
<tr>${Object.values(modelResponses).map(output => `<td>${formatText(escapeHTML(output))}</td>`).join("")}</tr> `<th class="model-col ${sanitizeClassName(model)}">${escapeHTML(model)}</th>`).join("")}</tr>
<tr>${Object.keys(modelResponses).map(model =>
`<td class="model-col ${sanitizeClassName(model)}">${formatText(escapeHTML(modelResponses[model]))}</td>`).join("")}</tr>
</table> </table>
</td>`; </td>`;
tableBody.appendChild(nestedRow); tableBody.appendChild(nestedRow);
...@@ -102,6 +142,16 @@ ...@@ -102,6 +142,16 @@
} }
} }
function updateModelVisibility() {
availableModels.forEach(model => {
const sanitizedModel = sanitizeClassName(model);
const isChecked = document.getElementById(sanitizedModel).checked;
document.querySelectorAll(`.model-col.${sanitizedModel}`).forEach(el => {
el.style.display = isChecked ? "" : "none";
});
});
}
function toggleNested(row) { function toggleNested(row) {
document.querySelectorAll('.row').forEach(r => r.classList.remove('highlight')); document.querySelectorAll('.row').forEach(r => r.classList.remove('highlight'));
document.querySelectorAll('.nested').forEach(n => n.style.display = 'none'); document.querySelectorAll('.nested').forEach(n => n.style.display = 'none');
...@@ -138,6 +188,11 @@ ...@@ -138,6 +188,11 @@
return ""; return "";
} }
// Sanitize model names to be used in CSS classes
function sanitizeClassName(model) {
return model.replace(/[^a-zA-Z0-9]/g, "_"); // Replace non-alphanumeric characters with "_"
}
loadBehaviorData(); loadBehaviorData();
</script> </script>
</body> </body>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment