feat: update HuggingFace API endpoint and request format

Switch to new router endpoint and adapt to chat completions format.

Co-authored-by: awa <212803252+aguitauwu@users.noreply.github.com>
This commit is contained in:
v0
2026-02-10 21:31:55 +00:00
parent 43a0866b19
commit 9efa16a774

View File

@@ -1,12 +1,9 @@
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
const HF_MODELS: Record<string, string> = { const HF_MODELS: Record<string, string> = {
"yuuki-v0.1": "yuuki-v0.1": "YuuKi-OS/Yuuki-v0.1",
"https://router.huggingface.co/models/YuuKi-OS/Yuuki-v0.1", "yuuki-3.7": "YuuKi-OS/Yuuki-3.7",
"yuuki-3.7": "yuuki-best": "YuuKi-OS/Yuuki-best",
"https://router.huggingface.co/models/YuuKi-OS/Yuuki-3.7",
"yuuki-best":
"https://router.huggingface.co/models/YuuKi-OS/Yuuki-best",
}; };
const YUUKI_API_MODELS: Record<string, string> = { const YUUKI_API_MODELS: Record<string, string> = {
@@ -54,40 +51,29 @@ async function callYuukiApi(
} }
/** /**
* Calls HuggingFace Inference API directly with an hf_ token. * Calls HuggingFace Inference API via the new router.huggingface.co endpoint.
* Uses the OpenAI-compatible chat completions format.
*/ */
async function callHuggingFace( async function callHuggingFace(
token: string, token: string,
model: string, model: string,
messages: { role: string; content: string }[] messages: { role: string; content: string }[]
) { ) {
const modelUrl = HF_MODELS[model] || HF_MODELS["yuuki-best"]; const modelId = HF_MODELS[model] || HF_MODELS["yuuki-best"];
const url = `https://router.huggingface.co/hf-inference/models/${modelId}/v1/chat/completions`;
const prompt = const response = await fetch(url, {
messages
.map((m) => {
if (m.role === "system") return `System: ${m.content}`;
if (m.role === "user") return `User: ${m.content}`;
if (m.role === "assistant") return `Assistant: ${m.content}`;
return m.content;
})
.join("\n") + "\nAssistant:";
const response = await fetch(modelUrl, {
method: "POST", method: "POST",
headers: { headers: {
Authorization: `Bearer ${token}`, Authorization: `Bearer ${token}`,
"Content-Type": "application/json", "Content-Type": "application/json",
}, },
body: JSON.stringify({ body: JSON.stringify({
inputs: prompt, model: modelId,
parameters: { messages,
max_new_tokens: 1024, max_tokens: 1024,
temperature: 0.7, temperature: 0.7,
top_p: 0.9, top_p: 0.9,
repetition_penalty: 1.1,
return_full_text: false,
},
}), }),
}); });
@@ -99,30 +85,12 @@ async function callHuggingFace(
} }
const data = await response.json(); const data = await response.json();
let generatedText = ""; const content =
data.choices?.[0]?.message?.content?.trim() || "No response generated.";
if (Array.isArray(data) && data[0]?.generated_text) {
generatedText = data[0].generated_text.trim();
} else if (typeof data === "string") {
generatedText = data.trim();
} else if (data?.generated_text) {
generatedText = data.generated_text.trim();
} else {
generatedText = JSON.stringify(data);
}
// Clean up artifacts
const cutoffs = ["User:", "System:", "\nUser", "\nSystem"];
for (const cutoff of cutoffs) {
const idx = generatedText.indexOf(cutoff);
if (idx > 0) generatedText = generatedText.substring(0, idx).trim();
}
return { return {
content: content,
generatedText || id: data.id || `chatcmpl-${Date.now()}`,
"I received your message but couldn't generate a response. Please try again.",
id: `chatcmpl-${Date.now()}`,
model, model,
}; };
} }