From 2456ba554b8cc2e5afa1fca3c7a445aa8d1c7c06 Mon Sep 17 00:00:00 2001
From: aguitauwu <aguitauwu343@gmail.com>
Date: Mon, 16 Feb 2026 11:11:27 -0600
Subject: [PATCH] pos nomas

---
 yuy/README.md           | 207 ++++++++++++++++++++++++++++++++++++++++
 yuy/src/commands/run.rs |  78 ++++++++++++---
 yuy/src/config.rs       |   3 +
 3 files changed, 273 insertions(+), 15 deletions(-)
 create mode 100644 yuy/README.md

diff --git a/yuy/README.md b/yuy/README.md
new file mode 100644
index 0000000..8c6b194
--- /dev/null
+++ b/yuy/README.md
@@ -0,0 +1,207 @@
+# Yuy - Official Yuuki CLI
+
+```
+$$\     $$\                    
+\$$\   $$  |                   
+ \$$\ $$  /$$\   $$\ $$\   $$\ 
+  \$$$$  / $$ |  $$ |$$ |  $$ |
+   \$$  /  $$ |  $$ |$$ |  $$ |
+    $$ |   $$ |  $$ |$$ |  $$ |
+    $$ |   \$$$$$$  |\$$$$$$$ |
+    \__|    \______/  \____$$ |
+                     $$\   $$ |
+                     \$$$$$$  |
+                      \______/ 
+```
+
+Official CLI for Yuuki - AI model management and inference
+
+## Features
+
+✨ **Download Models** - Get Yuuki models from Hugging Face
+🚀 **Run Models** - Execute with llama.cpp or ollama
+📋 **Manage Models** - List, info, and remove local models
+🔧 **Runtime Management** - Install and check llama.cpp/ollama
+🏥 **System Doctor** - Diagnose your setup
+
+## Installation
+
+### From Source
+
+```bash
+# Clone the repo
+git clone https://github.com/YuuKi-OS/yuy
+cd yuy
+
+# Build
+cargo build --release
+
+# Install (optional)
+cargo install --path .
+```
+
+### Termux
+
+```bash
+pkg install rust
+git clone https://github.com/YuuKi-OS/yuy
+cd yuy
+cargo build --release
+```
+
+## Quick Start
+
+```bash
+# Initial setup
+yuy setup
+
+# Download a model
+yuy download Yuuki-best
+
+# Run the model
+yuy run Yuuki-best
+
+# Check system health
+yuy doctor
+```
+
+## Commands
+
+### Download Models
+
+```bash
+# Download with auto-selected quantization
+yuy download Yuuki-best
+
+# Download specific quantization
+yuy download Yuuki-3.7 --quant q8_0
+
+# Available quantizations: q4_0, q5_k_m, q8_0, f32
+```
+
+### Run Models
+
+```bash
+# Run with default settings
+yuy run Yuuki-best
+
+# Specify runtime
+yuy run Yuuki-best --runtime llama-cpp
+
+# Use preset configuration
+yuy run Yuuki-best --preset creative  # creative, precise, balanced
+```
+
+### List Models
+
+```bash
+# List local models
+yuy list models
+
+# List available models on Hugging Face
+yuy list models --remote
+```
+
+### Model Information
+
+```bash
+# Show model info
+yuy info Yuuki-best
+
+# Show available variants
+yuy info Yuuki-best --variants
+```
+
+### Remove Models
+
+```bash
+yuy remove Yuuki-v0.1
+```
+
+### Runtime Management
+
+```bash
+# Check installed runtimes
+yuy runtime check
+
+# Install a runtime (interactive)
+yuy runtime install
+
+# Install specific runtime
+yuy runtime install llama-cpp
+
+# List available runtimes
+yuy runtime list
+```
+
+### System Diagnostics
+
+```bash
+yuy doctor
+```
+
+## Directory Structure
+
+```
+~/.yuuki/
+├── models/           # Downloaded models
+│   ├── Yuuki-best/
+│   ├── Yuuki-3.7/
+│   └── Yuuki-v0.1/
+└── config.toml       # Configuration
+```
+
+## Model Quantizations
+
+| Quantization | Size | Quality | Use Case |
+|-------------|------|---------|----------|
+| `q4_0` | Smallest | Good | Mobile, Termux |
+| `q5_k_m` | Medium | Better | Balanced |
+| `q8_0` | Large | Best | Desktop |
+| `f32` | Largest | Perfect | Full precision |
+
+## Runtimes
+
+### llama.cpp
+- Lightweight and fast
+- Best for Termux and low-end devices
+- Direct CLI usage
+
+### ollama
+- User-friendly
+- Server-based
+- API access
+
+## Development
+
+```bash
+# Run in development
+cargo run -- download Yuuki-best
+
+# Run tests
+cargo test
+
+# Build release
+cargo build --release
+```
+
+## Platform Support
+
+- ✅ Termux (Android)
+- ✅ Linux
+- ✅ macOS
+- ✅ Windows
+
+## Resources
+
+- **Models**: https://huggingface.co/OpceanAI
+- **Training Code**: https://github.com/YuuKi-OS/yuuki-training
+- **Issues**: https://github.com/YuuKi-OS/yuy/issues
+
+## License
+
+MIT
+
+---
+
+Made with 🌸 by the Yuuki team
diff --git a/yuy/src/commands/run.rs b/yuy/src/commands/run.rs
index 250a229..bf84823 100644
--- a/yuy/src/commands/run.rs
+++ b/yuy/src/commands/run.rs
@@ -1,7 +1,7 @@
 use anyhow::{Context, Result};
 use colored::Colorize;
 use std::process::Command;
-use crate::config::{get_models_dir, YUUKI_MODELS};
+use crate::config::{get_models_dir, YUUKI_MODELS, OLLAMA_ORG, HF_ORG};
 use crate::utils::command_exists;
 
 pub async fn execute(
@@ -62,8 +62,9 @@ pub async fn execute(
 
     match runtime_name.as_str() {
         "llama-cpp" => run_with_llama_cpp(&model_path, preset).await,
-        "ollama" => run_with_ollama(model, &model_path).await,
-        _ => anyhow::bail!("Unknown runtime: {}. Use 'llama-cpp' or 'ollama'", runtime_name),
+        "llama-hf" => run_with_llama_hf(model, &quant_str, preset).await,
+        "ollama" => run_with_ollama(model, &quant_str).await,
+        _ => anyhow::bail!("Unknown runtime: {}. Use 'llama-cpp', 'llama-hf', or 'ollama'", runtime_name),
     }
 }
 
@@ -118,7 +119,7 @@ async fn run_with_llama_cpp(model_path: &std::path::Path, preset: Option<String>
     Ok(())
 }
 
-async fn run_with_ollama(model: &str, _model_path: &std::path::Path) -> Result<()> {
+async fn run_with_ollama(model: &str, quant: &str) -> Result<()> {
     if !command_exists("ollama") {
         println!("{} ollama not found!", "✗".bright_red());
         println!("\n{} Install it first:", "→".bright_blue());
@@ -127,25 +128,72 @@ async fn run_with_ollama(model: &str, _model_path: &std::path::Path) -> Result<(
         return Ok(());
     }
 
-    println!(
-        "{} Note: Ollama integration is experimental.",
-        "⚠".bright_yellow()
-    );
-    println!("{} You'll need to import the model to Ollama first.", "→".bright_blue());
-    println!();
+    // Construct ollama model name: aguitachan3/yuuki-best:f32
+    let ollama_model = format!("{}/{}:{}", OLLAMA_ORG, model.to_lowercase(), quant);
 
-    // Check if ollama serve is running
-    println!("{} Checking Ollama service...", "→".bright_blue());
+    println!(
+        "{} Starting Ollama with {}...",
+        "▶".bright_green(),
+        ollama_model.bright_yellow()
+    );
+    println!();
     
-    // Try to run with ollama (this is simplified, real impl would be more complex)
     let status = Command::new("ollama")
         .arg("run")
-        .arg(model)
+        .arg(&ollama_model)
         .status()
         .context("Failed to execute ollama")?;
 
     if !status.success() {
-        println!("\n{} If the model is not in Ollama, you need to import it first.", "ℹ".bright_blue());
+        println!("\n{} Model not found in Ollama.", "ℹ".bright_blue());
+        println!("{} Pull it first: {}", "→".bright_blue(), format!("ollama pull {}", ollama_model).bright_green());
+    }
+
+    Ok(())
+}
+
+async fn run_with_llama_hf(model: &str, quant: &str, preset: Option<String>) -> Result<()> {
+    if !command_exists("llama-cli") {
+        println!("{} llama-cli not found!", "✗".bright_red());
+        println!("\n{} Install llama.cpp first:", "→".bright_blue());
+        println!("  Termux: {}", "pkg install llama-cpp".bright_green());
+        return Ok(());
+    }
+
+    // HuggingFace format: OpceanAI/Yuuki-best:F32
+    let hf_model = format!("{}/{}:{}", HF_ORG, model, quant.to_uppercase());
+
+    println!(
+        "{} Running directly from HuggingFace: {}",
+        "▶".bright_green(),
+        hf_model.bright_yellow()
+    );
+    println!("{} No download needed - streaming from HF", "ℹ".bright_blue());
+    println!();
+
+    // Configure parameters based on preset
+    let (temp, top_p) = match preset.as_deref() {
+        Some("creative") => (0.8, 0.9),
+        Some("precise") => (0.3, 0.5),
+        Some("balanced") | None => (0.6, 0.7),
+        _ => (0.6, 0.7),
+    };
+
+    // Run llama-cli with -hf flag
+    let status = Command::new("llama-cli")
+        .arg("-hf")
+        .arg(&hf_model)
+        .arg("--temp")
+        .arg(temp.to_string())
+        .arg("--top-p")
+        .arg(top_p.to_string())
+        .arg("-c")
+        .arg("4096")
+        .status()
+        .context("Failed to execute llama-cli with HuggingFace")?;
+
+    if !status.success() {
+        anyhow::bail!("llama-cli exited with error");
     }
 
     Ok(())
diff --git a/yuy/src/config.rs b/yuy/src/config.rs
index 90be8d7..bddde6d 100644
--- a/yuy/src/config.rs
+++ b/yuy/src/config.rs
@@ -70,3 +70,6 @@ pub fn save_config(config: &Config) -> Result<()> {
 
 pub const YUUKI_MODELS: &[&str] = &["Yuuki-best", "Yuuki-3.7", "Yuuki-v0.1"];
 pub const HF_ORG: &str = "OpceanAI";
+pub const AVAILABLE_QUANTS: &[&str] = &["q4_0", "q4_k_m", "q5_k_m", "q8_0", "f32"];
+pub const OLLAMA_ORG: &str = "aguitachan3";
+pub const YUUKI_API: &str = "https://huggingface.co/spaces/OpceanAI/Yuuki-api";