pos nomas

2026-02-18 22:01:09 +00:00 · 2026-02-16 11:11:27 -06:00
parent a8a603dad3
commit 2456ba554b
3 changed files with 273 additions and 15 deletions
--- a/yuy/README.md
+++ b/yuy/README.md
@@ -0,0 +1,207 @@
 # Yuy - Official Yuuki CLI
 ```
 $$\     $$\                    
 \$$\   $$  |                   
 \$$\ $$  /$$\   $$\ $$\   $$\ 
  \$$$$  / $$ |  $$ |$$ |  $$ |
   \$$  /  $$ |  $$ |$$ |  $$ |
    $$ |   $$ |  $$ |$$ |  $$ |
    $$ |   \$$$$$$  |\$$$$$$$ |
    \__|    \______/  \____$$ |
                     $$\   $$ |
                     \$$$$$$  |
                      \______/ 
 ```
 Official CLI for Yuuki - AI model management and inference
 ## Features
 ✨ **Download Models** - Get Yuuki models from Hugging Face
 🚀 **Run Models** - Execute with llama.cpp or ollama
 📋 **Manage Models** - List, info, and remove local models
 🔧 **Runtime Management** - Install and check llama.cpp/ollama
 🏥 **System Doctor** - Diagnose your setup
 ## Installation
 ### From Source
 ```bash
 # Clone the repo
 git clone https://github.com/YuuKi-OS/yuy
 cd yuy
 # Build
 cargo build --release
 # Install (optional)
 cargo install --path .
 ```
 ### Termux
 ```bash
 pkg install rust
 git clone https://github.com/YuuKi-OS/yuy
 cd yuy
 cargo build --release
 ```
 ## Quick Start
 ```bash
 # Initial setup
 yuy setup
 # Download a model
 yuy download Yuuki-best
 # Run the model
 yuy run Yuuki-best
 # Check system health
 yuy doctor
 ```
 ## Commands
 ### Download Models
 ```bash
 # Download with auto-selected quantization
 yuy download Yuuki-best
 # Download specific quantization
 yuy download Yuuki-3.7 --quant q8_0
 # Available quantizations: q4_0, q5_k_m, q8_0, f32
 ```
 ### Run Models
 ```bash
 # Run with default settings
 yuy run Yuuki-best
 # Specify runtime
 yuy run Yuuki-best --runtime llama-cpp
 # Use preset configuration
 yuy run Yuuki-best --preset creative  # creative, precise, balanced
 ```
 ### List Models
 ```bash
 # List local models
 yuy list models
 # List available models on Hugging Face
 yuy list models --remote
 ```
 ### Model Information
 ```bash
 # Show model info
 yuy info Yuuki-best
 # Show available variants
 yuy info Yuuki-best --variants
 ```
 ### Remove Models
 ```bash
 yuy remove Yuuki-v0.1
 ```
 ### Runtime Management
 ```bash
 # Check installed runtimes
 yuy runtime check
 # Install a runtime (interactive)
 yuy runtime install
 # Install specific runtime
 yuy runtime install llama-cpp
 # List available runtimes
 yuy runtime list
 ```
 ### System Diagnostics
 ```bash
 yuy doctor
 ```
 ## Directory Structure
 ```
 ~/.yuuki/
 ├── models/           # Downloaded models
 │   ├── Yuuki-best/
 │   ├── Yuuki-3.7/
 │   └── Yuuki-v0.1/
 └── config.toml       # Configuration
 ```
 ## Model Quantizations
 | Quantization | Size | Quality | Use Case |
 |-------------|------|---------|----------|
 | `q4_0` | Smallest | Good | Mobile, Termux |
 | `q5_k_m` | Medium | Better | Balanced |
 | `q8_0` | Large | Best | Desktop |
 | `f32` | Largest | Perfect | Full precision |
 ## Runtimes
 ### llama.cpp
 - Lightweight and fast
 - Best for Termux and low-end devices
 - Direct CLI usage
 ### ollama
 - User-friendly
 - Server-based
 - API access
 ## Development
 ```bash
 # Run in development
 cargo run -- download Yuuki-best
 # Run tests
 cargo test
 # Build release
 cargo build --release
 ```
 ## Platform Support
 - ✅ Termux (Android)
 - ✅ Linux
 - ✅ macOS
 - ✅ Windows
 ## Resources
 - **Models**: https://huggingface.co/OpceanAI
 - **Training Code**: https://github.com/YuuKi-OS/yuuki-training
 - **Issues**: https://github.com/YuuKi-OS/yuy/issues
 ## License
 MIT
 ---
 Made with 🌸 by the Yuuki team
--- a/yuy/src/commands/run.rs
+++ b/yuy/src/commands/run.rs
@@ -1,7 +1,7 @@
 use anyhow::{Context, Result};
 use colored::Colorize;
 use std::process::Command;
-use crate::config::{get_models_dir, YUUKI_MODELS};
+use crate::config::{get_models_dir, YUUKI_MODELS, OLLAMA_ORG, HF_ORG};
 use crate::utils::command_exists;
 pub async fn execute(
@@ -62,8 +62,9 @@ pub async fn execute(
    match runtime_name.as_str() {
        "llama-cpp" => run_with_llama_cpp(&model_path, preset).await,
-        "ollama" => run_with_ollama(model, &model_path).await,
+        "llama-hf" => run_with_llama_hf(model, &quant_str, preset).await,
-        _ => anyhow::bail!("Unknown runtime: {}. Use 'llama-cpp' or 'ollama'", runtime_name),
+        "ollama" => run_with_ollama(model, &quant_str).await,
        _ => anyhow::bail!("Unknown runtime: {}. Use 'llama-cpp', 'llama-hf', or 'ollama'", runtime_name),
    }
 }
@@ -118,7 +119,7 @@ async fn run_with_llama_cpp(model_path: &std::path::Path, preset: Option<String>
    Ok(())
 }
-async fn run_with_ollama(model: &str, _model_path: &std::path::Path) -> Result<()> {
+async fn run_with_ollama(model: &str, quant: &str) -> Result<()> {
    if !command_exists("ollama") {
        println!("{} ollama not found!", "✗".bright_red());
        println!("\n{} Install it first:", "→".bright_blue());
@@ -127,25 +128,72 @@ async fn run_with_ollama(model: &str, _model_path: &std::path::Path) -> Result<(
        return Ok(());
    }
-    println!(
+    // Construct ollama model name: aguitachan3/yuuki-best:f32
-        "{} Note: Ollama integration is experimental.",
+    let ollama_model = format!("{}/{}:{}", OLLAMA_ORG, model.to_lowercase(), quant);
        "⚠".bright_yellow()
    );
    println!("{} You'll need to import the model to Ollama first.", "→".bright_blue());
    println!();
-    // Check if ollama serve is running
+    println!(
-    println!("{} Checking Ollama service...", "→".bright_blue());
+        "{} Starting Ollama with {}...",
        "▶".bright_green(),
        ollama_model.bright_yellow()
    );
    println!();
    // Try to run with ollama (this is simplified, real impl would be more complex)
    let status = Command::new("ollama")
        .arg("run")
-        .arg(model)
+        .arg(&ollama_model)
        .status()
        .context("Failed to execute ollama")?;
    if !status.success() {
-        println!("\n{} If the model is not in Ollama, you need to import it first.", "ℹ".bright_blue());
+        println!("\n{} Model not found in Ollama.", "ℹ".bright_blue());
        println!("{} Pull it first: {}", "→".bright_blue(), format!("ollama pull {}", ollama_model).bright_green());
    }
    Ok(())
 }
 async fn run_with_llama_hf(model: &str, quant: &str, preset: Option<String>) -> Result<()> {
    if !command_exists("llama-cli") {
        println!("{} llama-cli not found!", "✗".bright_red());
        println!("\n{} Install llama.cpp first:", "→".bright_blue());
        println!("  Termux: {}", "pkg install llama-cpp".bright_green());
        return Ok(());
    }
    // HuggingFace format: OpceanAI/Yuuki-best:F32
    let hf_model = format!("{}/{}:{}", HF_ORG, model, quant.to_uppercase());
    println!(
        "{} Running directly from HuggingFace: {}",
        "▶".bright_green(),
        hf_model.bright_yellow()
    );
    println!("{} No download needed - streaming from HF", "ℹ".bright_blue());
    println!();
    // Configure parameters based on preset
    let (temp, top_p) = match preset.as_deref() {
        Some("creative") => (0.8, 0.9),
        Some("precise") => (0.3, 0.5),
        Some("balanced") | None => (0.6, 0.7),
        _ => (0.6, 0.7),
    };
    // Run llama-cli with -hf flag
    let status = Command::new("llama-cli")
        .arg("-hf")
        .arg(&hf_model)
        .arg("--temp")
        .arg(temp.to_string())
        .arg("--top-p")
        .arg(top_p.to_string())
        .arg("-c")
        .arg("4096")
        .status()
        .context("Failed to execute llama-cli with HuggingFace")?;
    if !status.success() {
        anyhow::bail!("llama-cli exited with error");
    }
    Ok(())
--- a/yuy/src/config.rs
+++ b/yuy/src/config.rs
@@ -70,3 +70,6 @@ pub fn save_config(config: &Config) -> Result<()> {
 pub const YUUKI_MODELS: &[&str] = &["Yuuki-best", "Yuuki-3.7", "Yuuki-v0.1"];
 pub const HF_ORG: &str = "OpceanAI";
 pub const AVAILABLE_QUANTS: &[&str] = &["q4_0", "q4_k_m", "q5_k_m", "q8_0", "f32"];
 pub const OLLAMA_ORG: &str = "aguitachan3";
 pub const YUUKI_API: &str = "https://huggingface.co/spaces/OpceanAI/Yuuki-api";