init commit

2026-02-18 22:01:09 +00:00 · 2026-02-05 21:57:23 -06:00
parent 85a99a332f
commit 8ca411ec5b
17 changed files with 2315 additions and 112 deletions
--- a/160
+++ b/160
@@ -1,7 +1,9 @@
-                                 Apache License
+Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

+Copyright 2026 OpceanAI
+
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.
@@ -63,130 +65,64 @@
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
+   2. Grant of Copyright License.
+      Subject to the terms and conditions of this License, each Contributor
+      hereby grants to You a perpetual, worldwide, non-exclusive, no-charge,
+      royalty-free, irrevocable copyright license to reproduce, prepare
+      Derivative Works of, publicly display, publicly perform, sublicense,
+      and distribute the Work and such Derivative Works in Source or Object
+      form.

-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
+   3. Grant of Patent License.
+      Subject to the terms and conditions of this License, each Contributor
+      hereby grants to You a perpetual, worldwide, non-exclusive, no-charge,
+      royalty-free, irrevocable (except as stated in this section) patent
+      license to make, have made, use, offer to sell, sell, import, and
+      otherwise transfer the Work, where such license applies only to those
+      patent claims licensable by such Contributor that are necessarily
+      infringed by their Contribution(s) alone or by combination of their
+      Contribution(s) with the Work to which such Contribution(s) was
+      submitted. If You institute patent litigation against any entity
+      alleging that the Work or a Contribution constitutes patent
+      infringement, then any patent licenses granted under this License
+      shall terminate as of the date such litigation is filed.

-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
+   4. Redistribution.
+      You may reproduce and distribute copies of the Work or Derivative
+      Works thereof in any medium, with or without modifications, provided
+      that You meet the following conditions:

-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
+      (a) You must give recipients a copy of this License; and
+      (b) You must cause modified files to carry prominent notices stating
+          that You changed the files; and
+      (c) You must retain all copyright, patent, trademark, and attribution
+          notices; and
+      (d) Any NOTICE file must be included if present.

-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
+   5. Submission of Contributions.
+      Unless You explicitly state otherwise, any Contribution submitted
+      shall be under the terms of this License.

-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
+   6. Trademarks.
+      This License does not grant permission to use the trade names,
+      trademarks, or service marks of the Licensor.

-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
+   7. Disclaimer of Warranty.
+      The Work is provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+      CONDITIONS OF ANY KIND.

-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
+   8. Limitation of Liability.
+      In no event shall any Contributor be liable for damages arising from
+      the use of the Work.

-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
+   9. Accepting Warranty or Additional Liability.
+      You may offer support or warranty only on Your own behalf.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2026 OpceanAI

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,838 @@
+<p align="center">
+
+```
+$$\     $$\                    
+\$$\   $$  |                   
+ \$$\ $$  /$$\   $$\ $$\   $$\ 
+  \$$$$  / $$ |  $$ |$$ |  $$ |
+   \$$  /  $$ |  $$ |$$ |  $$ |
+    $$ |   $$ |  $$ |$$ |  $$ |
+    $$ |   \$$$$$$  |\$$$$$$$ |
+    \__|    \______/  \____$$ |
+                     $$\   $$ |
+                     \$$$$$$  |
+                      \______/ 
+```
+
+**The official CLI for the Yuuki project.**
+Download, manage, and run Yuuki models locally.
+
+[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
+[![Rust](https://img.shields.io/badge/rust-1.75%2B-orange.svg)](https://www.rust-lang.org/)
+[![Platform](https://img.shields.io/badge/platform-Termux%20%7C%20Linux%20%7C%20macOS%20%7C%20Windows-green.svg)](#platform-support)
+[![HuggingFace](https://img.shields.io/badge/models-HuggingFace-yellow.svg)](https://huggingface.co/OpceanAI)
+
+</p>
+
+---
+
+## Table of Contents
+
+- [About](#about)
+- [Features](#features)
+- [Installation](#installation)
+- [Quick Start](#quick-start)
+- [Commands](#commands)
+  - [download](#download)
+  - [run](#run)
+  - [list](#list)
+  - [info](#info)
+  - [remove](#remove)
+  - [runtime](#runtime)
+  - [doctor](#doctor)
+  - [setup](#setup)
+- [Model Quantizations](#model-quantizations)
+- [Runtimes](#runtimes)
+- [Configuration](#configuration)
+- [Architecture](#architecture)
+- [Platform Support](#platform-support)
+- [Platform-Specific Optimizations](#platform-specific-optimizations)
+- [Design Decisions](#design-decisions)
+- [Performance](#performance)
+- [Security](#security)
+- [Roadmap](#roadmap)
+- [Contributing](#contributing)
+- [About Yuuki](#about-yuuki)
+- [Links](#links)
+- [License](#license)
+
+---
+
+## About
+
+**Yuy** is the official command-line interface for the [Yuuki project](https://huggingface.co/OpceanAI) â€” an LLM trained entirely on a smartphone. Yuy provides a complete toolkit for downloading, managing, and running Yuuki models on local hardware, with first-class support for mobile devices running Termux.
+
+Yuy wraps proven inference engines (llama.cpp, ollama) and provides an opinionated, streamlined experience on top of them. It handles model discovery, quantization selection, runtime management, and system diagnostics so you can go from zero to inference in three commands.
+
+```
+yuy setup
+yuy download Yuuki-best
+yuy run Yuuki-best
+```
+
+---
+
+## Features
+
+- **Download models** from Hugging Face with streaming progress bars and auto-selected quantization
+- **Run models** locally using llama.cpp or ollama with preset configurations
+- **Manage models** â€” list, inspect, and remove local models
+- **Runtime management** â€” detect, install, and configure inference runtimes
+- **System diagnostics** â€” check hardware, dependencies, and configuration health
+- **Cross-platform** â€” Termux (Android), Linux, macOS, and Windows
+- **Mobile-first** â€” optimized defaults for constrained hardware
+- **Zero configuration** â€” smart defaults that work out of the box
+
+---
+
+## Installation
+
+### Prerequisites
+
+- [Rust](https://www.rust-lang.org/tools/install) 1.75 or later
+- An inference runtime: [llama.cpp](https://github.com/ggerganov/llama.cpp) or [ollama](https://ollama.ai) (Yuy can install these for you)
+
+### From Source
+
+```bash
+git clone https://github.com/YuuKi-OS/yuy
+cd yuy
+cargo build --release
+```
+
+The binary will be at `target/release/yuy`. Optionally install it system-wide:
+
+```bash
+cargo install --path .
+```
+
+### Termux (Android)
+
+```bash
+pkg install rust git
+git clone https://github.com/YuuKi-OS/yuy
+cd yuy
+cargo build --release
+```
+
+> **Note:** First compilation on Termux takes longer due to ARM CPU constraints. Subsequent builds use cache and are significantly faster.
+
+### Verify Installation
+
+```bash
+yuy --version
+yuy doctor
+```
+
+---
+
+## Quick Start
+
+```bash
+# 1. Initial setup â€” creates directories, detects hardware, offers runtime install
+yuy setup
+
+# 2. Download a model â€” auto-selects best quantization for your hardware
+yuy download Yuuki-best
+
+# 3. Run the model â€” interactive chat session
+yuy run Yuuki-best
+```
+
+That's it. Yuy handles quantization selection, runtime detection, and parameter configuration automatically.
+
+---
+
+## Commands
+
+### download
+
+Download Yuuki models from Hugging Face.
+
+```bash
+# Auto-select best quantization for your hardware
+yuy download Yuuki-best
+
+# Specify a quantization
+yuy download Yuuki-best --quant q8_0
+
+# Download a different model
+yuy download Yuuki-3.7 --quant q4_0
+```
+
+**What happens:**
+
+1. Validates the model name against the known model registry
+2. Detects your platform and available RAM
+3. Recommends the best quantization (or uses your override)
+4. Constructs the Hugging Face download URL
+5. Streams the file with a progress bar showing speed and ETA
+6. Saves to `~/.yuuki/models/<model-name>/`
+
+**Available quantizations:** `q4_0`, `q5_k_m`, `q8_0`, `f32`
+
+---
+
+### run
+
+Run a downloaded model with an inference runtime.
+
+```bash
+# Run with defaults
+yuy run Yuuki-best
+
+# Specify a runtime
+yuy run Yuuki-best --runtime llama-cpp
+
+# Use a preset
+yuy run Yuuki-best --preset creative
+```
+
+**Presets:**
+
+| Preset | Temperature | Top P | Use Case |
+|--------|-------------|-------|----------|
+| `balanced` | 0.6 | 0.7 | General use (default) |
+| `creative` | 0.8 | 0.9 | Creative writing, exploration |
+| `precise` | 0.3 | 0.5 | Factual, deterministic output |
+
+Yuy detects the available runtime automatically. If both llama.cpp and ollama are installed, it defaults to llama.cpp (or your configured preference).
+
+**Runtime detection order for llama.cpp:**
+
+```
+llama-cli â†’ llama â†’ main
+```
+
+---
+
+### list
+
+List models locally or remotely.
+
+```bash
+# List downloaded models with sizes
+yuy list models
+
+# List all available models on Hugging Face
+yuy list models --remote
+```
+
+**Example output:**
+
+```
+Local Models:
+  Yuuki-best     q4_0     2.3 GB
+  Yuuki-3.7      q5_k_m   3.1 GB
+
+Total: 5.4 GB
+```
+
+---
+
+### info
+
+Display detailed information about a model.
+
+```bash
+# Show model info
+yuy info Yuuki-best
+
+# Show available variants/quantizations
+yuy info Yuuki-best --variants
+```
+
+Shows download status, file sizes, available quantizations, and the path on disk.
+
+---
+
+### remove
+
+Remove a downloaded model.
+
+```bash
+yuy remove Yuuki-v0.1
+```
+
+Calculates the disk space to be freed and asks for confirmation before deletion.
+
+---
+
+### runtime
+
+Manage inference runtimes.
+
+```bash
+# Check what's installed
+yuy runtime check
+
+# Install a runtime (interactive selection)
+yuy runtime install
+
+# Install a specific runtime
+yuy runtime install llama-cpp
+
+# List supported runtimes
+yuy runtime list
+```
+
+**Installation methods by platform:**
+
+| Platform | llama.cpp | ollama |
+|----------|-----------|--------|
+| Termux | `pkg install llama-cpp` | `pkg install ollama` |
+| macOS | `brew install llama.cpp` | `brew install ollama` |
+| Linux | Binary from GitHub Releases | Official installer |
+| Windows | Chocolatey or manual download | Official installer |
+
+---
+
+### doctor
+
+Run a full system diagnostic.
+
+```bash
+yuy doctor
+```
+
+**Example output:**
+
+```
+System Information:
+  Platform: Termux
+  OS: linux
+  Arch: aarch64
+  RAM: ~6 GB
+  Recommended quantization: q4_0
+
+Yuuki Configuration:
+  Config dir: /data/data/com.termux/files/home/.yuuki
+  Models dir: /data/data/com.termux/files/home/.yuuki/models
+  Models downloaded: 2
+  Total size: 3.7 GB
+
+Runtime Status:
+  [ok] llama.cpp installed (v3.1.0)
+  [--] ollama not installed
+
+System Dependencies:
+  [ok] curl available
+  [ok] wget available
+  [--] git not found
+
+Health Summary:
+  System is ready to use Yuuki!
+```
+
+---
+
+### setup
+
+First-time setup wizard.
+
+```bash
+yuy setup
+```
+
+Creates the `~/.yuuki/` directory structure, detects your platform and hardware, checks for runtimes, and offers to install one if none are found. Run this once after installation.
+
+---
+
+## Model Quantizations
+
+Quantization reduces model size at the cost of some precision. Yuy automatically recommends the best option for your hardware.
+
+| Quantization | Relative Size | Quality | Recommended For |
+|-------------|---------------|---------|-----------------|
+| `q4_0` | Smallest | Good | Termux, low-RAM devices (<8 GB) |
+| `q5_k_m` | Medium | Better | Desktop with 8-16 GB RAM |
+| `q8_0` | Large | Best | Desktop with 16+ GB RAM |
+| `f32` | Largest | Full precision | Research, analysis |
+
+**Auto-selection logic:**
+
+```
+Termux (any RAM)     â†’ q4_0
+Linux/macOS (<8 GB)  â†’ q4_k_m
+Linux/macOS (<16 GB) â†’ q5_k_m  (default)
+Linux/macOS (16+ GB) â†’ q8_0
+```
+
+---
+
+## Runtimes
+
+Yuy delegates inference to external engines. It currently supports two runtimes:
+
+### llama.cpp
+
+The default and recommended runtime. Lightweight, portable, and highly optimized.
+
+- Single binary, no dependencies
+- CPU-optimized with SIMD (NEON on ARM, AVX on x86)
+- Optional GPU acceleration (CUDA, Metal, Vulkan)
+- Low memory footprint
+- Ideal for Termux
+
+**How Yuy invokes llama.cpp:**
+
+```bash
+llama-cli \
+  -m ~/.yuuki/models/Yuuki-best/yuuki-best-q4_0.gguf \
+  --interactive \
+  --temp 0.7 \
+  --top-p 0.9 \
+  -c 4096 \
+  -n -1 \
+  --color
+```
+
+### ollama
+
+Server-based runtime with a more user-friendly model management system.
+
+- Built-in model management
+- REST API for programmatic access
+- Can serve multiple models
+- Optional web UI
+
+---
+
+## Configuration
+
+### Config File
+
+Location: `~/.yuuki/config.toml`
+
+```toml
+[config]
+hf_token = ""                    # Optional: for private models
+default_runtime = "llama-cpp"    # llama-cpp | ollama
+default_quant = "q5_k_m"         # q4_0 | q5_k_m | q8_0 | f32
+```
+
+### Priority Order
+
+Settings are resolved in this order (highest priority first):
+
+1. **CLI flags** â€” `yuy run Yuuki-best --quant q8_0`
+2. **Config file** â€” `default_quant = "q5_k_m"`
+3. **Auto-detection** â€” platform and hardware-based defaults
+
+### Directory Structure
+
+```
+~/.yuuki/
+â”œâ”€â”€ config.toml              # User configuration
+â””â”€â”€ models/                  # Downloaded models
+    â”œâ”€â”€ Yuuki-best/
+    â”‚   â”œâ”€â”€ yuuki-best-q4_0.gguf
+    â”‚   â””â”€â”€ yuuki-best-q5_k_m.gguf
+    â”œâ”€â”€ Yuuki-3.7/
+    â””â”€â”€ Yuuki-v0.1/
+```
+
+On Termux, the base path is `/data/data/com.termux/files/home/.yuuki/`.
+
+---
+
+## Architecture
+
+```
+â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+â”‚                      User                            â”‚
+â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+                        â”‚
+                        v
+â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+â”‚                  Yuy CLI (Rust)                       â”‚
+â”‚                                                       â”‚
+â”‚  CLI Layer â”€â”€â”€â”€â”€â”€â”€â”€ clap + colored                    â”‚
+â”‚       â”‚              Argument parsing, UI, validation â”‚
+â”‚       v                                               â”‚
+â”‚  Commands Layer â”€â”€â”€ 8 async command modules           â”‚
+â”‚       â”‚              download, run, list, info,       â”‚
+â”‚       â”‚              remove, runtime, doctor, setup   â”‚
+â”‚       v                                               â”‚
+â”‚  Core Services â”€â”€â”€â”€ config.rs + utils.rs              â”‚
+â”‚                      Config management, platform      â”‚
+â”‚                      detection, formatting            â”‚
+â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+           â”‚                      â”‚
+           v                      v
+  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”    â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+  â”‚  External APIs   â”‚    â”‚  Local Storage    â”‚
+  â”‚  Hugging Face    â”‚    â”‚  ~/.yuuki/        â”‚
+  â”‚  GitHub          â”‚    â”‚  Models + Config  â”‚
+  â””â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜    â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+           â”‚
+           v
+  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+  â”‚    Inference Runtimes         â”‚
+  â”‚  llama.cpp    â”‚    ollama     â”‚
+  â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+```
+
+### Source Layout
+
+```
+yuy/
+â”œâ”€â”€ Cargo.toml          # Project manifest and dependencies
+â”œâ”€â”€ README.md
+â”œâ”€â”€ PROJECT.md          # Technical documentation
+â”‚
+â””â”€â”€ src/
+    â”œâ”€â”€ main.rs         # Entry point, CLI router, error handling
+    â”œâ”€â”€ cli.rs          # CLI definitions with clap derive macros
+    â”œâ”€â”€ config.rs       # Configuration management, paths, constants
+    â”œâ”€â”€ utils.rs        # Platform detection, RAM check, formatting
+    â”‚
+    â””â”€â”€ commands/
+        â”œâ”€â”€ mod.rs      # Module declarations
+        â”œâ”€â”€ download.rs # Model download with streaming + progress
+        â”œâ”€â”€ run.rs      # Model execution with runtime detection
+        â”œâ”€â”€ list.rs     # Local and remote model listing
+        â”œâ”€â”€ info.rs     # Model metadata and variant inspection
+        â”œâ”€â”€ remove.rs   # Model deletion with confirmation
+        â”œâ”€â”€ runtime.rs  # Runtime detection and installation
+        â”œâ”€â”€ doctor.rs   # System diagnostics
+        â””â”€â”€ setup.rs    # First-time setup wizard
+```
+
+### Design Patterns
+
+- **Command pattern** â€” Each command is an isolated async module with an `execute()` entry point
+- **Type-safe CLI** â€” `clap` derive macros ensure compile-time validation of arguments
+- **Async I/O** â€” Tokio runtime for non-blocking downloads and process management
+- **Error propagation** â€” `anyhow::Result` with contextual error messages throughout
+
+### Dependencies
+
+| Crate | Purpose |
+|-------|---------|
+| `clap` | CLI argument parsing with derive macros |
+| `tokio` | Async runtime |
+| `reqwest` | HTTP client for downloads |
+| `indicatif` | Progress bars |
+| `colored` | Terminal color output |
+| `serde` + `toml` | Configuration serialization |
+| `dirs` | Cross-platform home directory detection |
+| `anyhow` | Error handling |
+| `futures-util` | Stream utilities for downloads |
+
+---
+
+## Platform Support
+
+| Platform | Status | Notes |
+|----------|--------|-------|
+| Termux (Android) | Full support | Primary target, fully tested |
+| Linux (x86_64) | Full support | Tested on Ubuntu 22.04+ |
+| Linux (ARM64) | Full support | Tested on Raspberry Pi |
+| macOS (Intel) | Full support | Tested on Big Sur+ |
+| macOS (Apple Silicon) | Full support | Metal acceleration via llama.cpp |
+| Windows 10/11 | Partial | Runtime auto-install not yet implemented |
+
+---
+
+## Platform-Specific Optimizations
+
+### Termux (Android)
+
+Termux is the primary target. Yuy applies these optimizations automatically:
+
+- **Default quantization:** `q4_0` (minimum memory footprint)
+- **Download buffer:** 64 KB (conservative for mobile I/O)
+- **Compilation:** Single-threaded (`-j 1`) to avoid thermal throttling
+- **Progress bars:** Simplified for narrower terminal widths
+
+**Platform detection:**
+
+```rust
+std::env::var("PREFIX")
+    .map(|p| p.contains("com.termux"))
+    .unwrap_or(false)
+```
+
+### Linux Desktop
+
+- Default quantization: `q5_k_m`
+- Parallel compilation
+- GPU support via CUDA or ROCm when available
+
+### macOS
+
+- Metal acceleration for Apple Silicon GPUs
+- Homebrew-based runtime installation
+- `q8_0` default on machines with 16+ GB RAM
+
+### Windows
+
+- Path handling with backslashes
+- Chocolatey for package management
+- CUDA support for NVIDIA GPUs
+
+---
+
+## Design Decisions
+
+### Why Rust?
+
+- **Performance** â€” small, fast binaries with no runtime overhead
+- **Memory safety** â€” no garbage collector, no segfaults
+- **Async ecosystem** â€” Tokio provides mature non-blocking I/O
+- **Cross-compilation** â€” single codebase targets all platforms
+- **Cargo** â€” dependency management and build system in one tool
+
+### Why wrap llama.cpp instead of building a custom runtime?
+
+Pragmatism. llama.cpp has 3+ years of optimization work from 500+ contributors. It handles SIMD, GPU acceleration, quantization formats, and thousands of edge cases. Building an equivalent would take years for a single developer. Yuy provides the experience layer; llama.cpp provides the engine.
+
+### Why clap for CLI?
+
+clap v4 absorbed structopt, has the best documentation in the Rust CLI ecosystem, supports colored help text, and provides compile-time validation through derive macros.
+
+### Why TOML for configuration?
+
+TOML is more readable than JSON, simpler than YAML, and is the standard in the Rust ecosystem (Cargo.toml). First-class serde support makes serialization trivial.
+
+### Why async/await?
+
+Large model downloads (multi-GB) must not block the UI. Async enables smooth progress bars, and sets the foundation for future parallel chunk downloads.
+
+---
+
+## Performance
+
+### Benchmarks
+
+| Operation | Target | Actual |
+|-----------|--------|--------|
+| CLI startup | <100 ms | ~50 ms |
+| Download 1 GB | <5 min | 3-4 min (network dependent) |
+| Model listing | <50 ms | ~10 ms |
+| Doctor check | <200 ms | ~150 ms |
+
+### Binary Size
+
+```
+Release build: ~8 MB
+```
+
+### Code Statistics
+
+```
+Rust source files:  15
+Lines of code:      ~2,500
+Direct dependencies: 11
+Clean build time:    ~2 min
+```
+
+---
+
+## Security
+
+### Current Measures
+
+- **URL validation** â€” only downloads from `https://huggingface.co/`
+- **No arbitrary code execution** â€” Yuy spawns runtimes, never executes model content
+- **Scoped file access** â€” all operations within `~/.yuuki/`
+
+### Planned (v0.2+)
+
+- SHA256 checksum verification for downloaded models
+- System keyring integration for Hugging Face tokens (instead of plaintext in config)
+- File permission enforcement (`0o600` for sensitive files)
+- Encrypted token storage on Termux via libsodium
+
+---
+
+## Roadmap
+
+### Phase 1: MVP (Complete)
+
+- [x] Core CLI with 8 commands
+- [x] Download from Hugging Face with progress bars
+- [x] Run models with llama.cpp
+- [x] Model management (list, info, remove)
+- [x] Runtime detection and installation
+- [x] System diagnostics
+- [x] Setup wizard
+- [x] Multi-platform support
+- [x] Auto-selection of quantization
+- [x] Colored terminal output
+
+### Phase 2: Core Features (In Progress)
+
+- [ ] Resume interrupted downloads
+- [ ] Parallel chunk downloads
+- [ ] SHA256 checksum verification
+- [ ] Full ollama integration (Modelfile generation)
+- [ ] Automated installation on all platforms
+- [ ] Unit and integration tests
+- [ ] CI/CD with GitHub Actions
+
+### Phase 3: Advanced Features (Planned)
+
+- [ ] Persistent conversation sessions
+  ```
+  ~/.yuuki/conversations/
+  â”œâ”€â”€ session-2026-01-15.json
+  â””â”€â”€ session-2026-01-16.json
+  ```
+- [ ] Template system for custom prompts
+  ```bash
+  yuy template create coding-assistant
+  yuy run Yuuki-best --template coding-assistant
+  ```
+- [ ] Custom user-defined presets
+  ```toml
+  [presets.my-creative]
+  temperature = 0.9
+  top_p = 0.95
+  top_k = 50
+  ```
+- [ ] llama.cpp library integration (bypass CLI spawning)
+- [ ] Training code download command
+
+### Phase 4: Ecosystem (Future)
+
+- [ ] Plugin system
+- [ ] Optional web UI
+- [ ] REST API server mode
+- [ ] Auto-updates
+- [ ] Optional telemetry (opt-in)
+- [ ] Community model hub with ratings
+- [ ] Fine-tuning helpers
+
+---
+
+## Contributing
+
+### Development Setup
+
+```bash
+# Clone
+git clone https://github.com/YuuKi-OS/yuy
+cd yuy
+
+# Install Rust (if needed)
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+
+# Install dev tools
+cargo install cargo-watch cargo-edit
+
+# Verify
+cargo check
+cargo test
+cargo fmt -- --check
+cargo clippy
+```
+
+### Commit Convention
+
+```
+<type>(<scope>): <subject>
+```
+
+**Types:** `feat`, `fix`, `docs`, `style`, `refactor`, `test`, `chore`
+
+**Example:**
+
+```
+feat(download): add resume capability
+
+- Implement Range headers for resume
+- Save download state in .partial files
+- Auto-recover on failure
+
+Closes #42
+```
+
+### Pull Request Checklist
+
+- [ ] Tests pass (`cargo test`)
+- [ ] Code is formatted (`cargo fmt`)
+- [ ] No clippy warnings (`cargo clippy`)
+- [ ] Documentation updated if needed
+- [ ] Commits follow the convention above
+
+### Coding Standards
+
+- `snake_case` for functions, `CamelCase` for types
+- Document all public functions
+- Use `Result<T>` and the `?` operator for error handling
+- Prefer `async/await` over callbacks
+- Justify any new dependency
+
+---
+
+## About Yuuki
+
+Yuy exists to serve the [Yuuki project](https://huggingface.co/OpceanAI/Yuuki-best) â€” a code-generation LLM being trained entirely on a smartphone (Redmi 12, Snapdragon 685, CPU only) with zero cloud budget.
+
+**Key facts about the model:**
+
+| Detail | Value |
+|--------|-------|
+| Base model | GPT-2 (124M parameters) |
+| Training type | Continued pre-training (fine-tuning) |
+| Hardware | Snapdragon 685, CPU only |
+| Training time | 50+ hours |
+| Progress | 2,000 / 37,500 steps (5.3%) |
+| Cost | $0.00 |
+| Best language | Agda (55/100) |
+| License | Apache 2.0 |
+
+**Current quality scores (Checkpoint 2000):**
+
+| Language | Score |
+|----------|-------|
+| Agda | 55/100 |
+| C | 20/100 |
+| Assembly | 15/100 |
+| Python | 8/100 |
+
+A fully native model (trained from scratch, not fine-tuned) is planned for v1.0. A research paper documenting the mobile training methodology is in preparation.
+
+---
+
+## Links
+
+| Resource | URL |
+|----------|-----|
+| Model weights (recommended) | https://huggingface.co/OpceanAI/Yuuki-best |
+| Original model (historical) | https://huggingface.co/OpceanAI/Yuuki |
+| Interactive demo | https://huggingface.co/spaces/OpceanAI/Yuuki |
+| Training code | https://github.com/YuuKi-OS/yuuki-training |
+| CLI source (this repo) | https://github.com/YuuKi-OS/yuy |
+| Issues | https://github.com/YuuKi-OS/yuy/issues |
+
+---
+
+## License
+
+Licensed under the **Apache License, Version 2.0**.
+
+```
+Copyright 2026 Yuuki Project
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+```
+
+---
+
+<p align="center">
+  <b>Built with patience, a phone, and zero budget.</b><br>
+  Yuuki Project
+</p>
--- a/yuy/.gitignore
+++ b/yuy/.gitignore
@@ -0,0 +1,6 @@
+/target
+Cargo.lock
+*.swp
+*.swo
+*~
+.DS_Store
--- a/yuy/Cargo.toml
+++ b/yuy/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "yuy"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+clap = { version = "4.5", features = ["derive", "color"] }
+tokio = { version = "1.40", features = ["full"] }
+reqwest = { version = "0.12", features = ["stream", "json"] }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+toml = "0.8"
+indicatif = "0.17"
+colored = "2.1"
+anyhow = "1.0"
+dirs = "5.0"
+futures-util = "0.3"
+
+[[bin]]
+name = "yuy"
+path = "src/main.rs"
--- a/yuy/src/cli.rs
+++ b/yuy/src/cli.rs
@@ -0,0 +1,109 @@
+use clap::{Parser, Subcommand};
+
+#[derive(Parser)]
+#[command(name = "yuy")]
+#[command(about = "Official CLI for Yuuki - AI model management and inference")]
+#[command(version = "0.1.0")]
+#[command(arg_required_else_help = false)]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Option<Commands>,
+}
+
+#[derive(Subcommand)]
+pub enum Commands {
+    /// Download a model from Hugging Face
+    Download {
+        /// Model name (Yuuki-best, Yuuki-3.7, Yuuki-v0.1)
+        model: String,
+
+        /// Specific quantization to download (q4_0, q5_k_m, q8_0, f32)
+        #[arg(short, long)]
+        quant: Option<String>,
+    },
+
+    /// Run a model with Yuuki Runtime
+    Run {
+        /// Model name
+        model: String,
+
+        /// Runtime to use (llama-cpp or ollama)
+        #[arg(short, long)]
+        runtime: Option<String>,
+
+        /// Quantization to use
+        #[arg(short, long)]
+        quant: Option<String>,
+
+        /// Preset configuration (creative, precise, balanced)
+        #[arg(short, long)]
+        preset: Option<String>,
+
+        /// Resume last conversation
+        #[arg(long)]
+        resume: bool,
+
+        /// Use a specific template
+        #[arg(short, long)]
+        template: Option<String>,
+    },
+
+    /// List models or other resources
+    List {
+        #[command(subcommand)]
+        target: ListTarget,
+    },
+
+    /// Show information about a model
+    Info {
+        /// Model name
+        model: String,
+
+        /// Show available variants/quantizations
+        #[arg(long)]
+        variants: bool,
+    },
+
+    /// Remove a local model
+    Remove {
+        /// Model name to remove
+        model: String,
+    },
+
+    /// Manage runtimes (llama.cpp, ollama)
+    Runtime {
+        #[command(subcommand)]
+        action: RuntimeAction,
+    },
+
+    /// Check system health and show diagnostics
+    Doctor,
+
+    /// Initial setup wizard
+    Setup,
+}
+
+#[derive(Subcommand)]
+pub enum ListTarget {
+    /// List local models
+    Models {
+        /// Show remote models available on Hugging Face
+        #[arg(long)]
+        remote: bool,
+    },
+}
+
+#[derive(Subcommand)]
+pub enum RuntimeAction {
+    /// Check installed runtimes
+    Check,
+
+    /// Install a runtime
+    Install {
+        /// Specific runtime to install (llama-cpp or ollama)
+        runtime: Option<String>,
+    },
+
+    /// List available runtimes
+    List,
+}
--- a/yuy/src/commands/doctor.rs
+++ b/yuy/src/commands/doctor.rs
@@ -0,0 +1,172 @@
+use anyhow::Result;
+use colored::Colorize;
+use crate::config::{get_yuuki_dir, get_models_dir};
+use crate::utils::{command_exists, detect_platform, get_available_ram_gb, recommend_quantization};
+
+pub async fn execute() -> Result<()> {
+    println!("{}", "🔍 Yuuki System Doctor".bright_cyan().bold());
+    println!();
+
+    // Platform
+    let platform = detect_platform();
+    println!("{}", "System Information:".bright_cyan());
+    println!(
+        "  {} {:?}",
+        "Platform:".bright_white(),
+        platform
+    );
+    println!(
+        "  {} {}",
+        "OS:".bright_white(),
+        std::env::consts::OS
+    );
+    println!(
+        "  {} {}",
+        "Arch:".bright_white(),
+        std::env::consts::ARCH
+    );
+
+    // RAM
+    let ram = get_available_ram_gb();
+    println!(
+        "  {} ~{} GB",
+        "RAM:".bright_white(),
+        ram
+    );
+
+    // Recommended quantization
+    let recommended_quant = recommend_quantization(platform.clone(), ram);
+    println!(
+        "  {} {}",
+        "Recommended quantization:".bright_white(),
+        recommended_quant.bright_green()
+    );
+
+    println!();
+
+    // Yuuki directories
+    println!("{}", "Yuuki Configuration:".bright_cyan());
+    
+    let yuuki_dir = get_yuuki_dir()?;
+    println!(
+        "  {} {}",
+        "Config dir:".bright_white(),
+        yuuki_dir.display().to_string().bright_yellow()
+    );
+
+    let models_dir = get_models_dir()?;
+    println!(
+        "  {} {}",
+        "Models dir:".bright_white(),
+        models_dir.display().to_string().bright_yellow()
+    );
+
+    // Check disk space
+    if models_dir.exists() {
+        let mut total_size = 0u64;
+        let mut model_count = 0;
+
+        for entry in std::fs::read_dir(&models_dir)? {
+            if let Ok(dir_entry) = entry {
+                if dir_entry.path().is_dir() {
+                    model_count += 1;
+                    // Calculate size
+                    if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
+                        for file in files {
+                            if let Ok(f) = file {
+                                if let Ok(metadata) = f.metadata() {
+                                    total_size += metadata.len();
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        println!(
+            "  {} {}",
+            "Models downloaded:".bright_white(),
+            model_count
+        );
+        println!(
+            "  {} {}",
+            "Total size:".bright_white(),
+            crate::utils::format_size(total_size).bright_green()
+        );
+    }
+
+    println!();
+
+    // Runtime status
+    println!("{}", "Runtime Status:".bright_cyan());
+
+    let llama_installed = command_exists("llama-cli")
+        || command_exists("llama")
+        || command_exists("main");
+
+    if llama_installed {
+        println!("  {} {} {}", "✓".bright_green(), "llama.cpp".bright_white(), "installed".bright_green());
+    } else {
+        println!("  {} {} {}", "✗".bright_red(), "llama.cpp".bright_white(), "not installed".bright_red());
+    }
+
+    let ollama_installed = command_exists("ollama");
+
+    if ollama_installed {
+        println!("  {} {} {}", "✓".bright_green(), "ollama".bright_white(), "installed".bright_green());
+    } else {
+        println!("  {} {} {}", "✗".bright_red(), "ollama".bright_white(), "not installed".bright_red());
+    }
+
+    println!();
+
+    // Dependencies
+    println!("{}", "System Dependencies:".bright_cyan());
+    
+    check_command("curl");
+    check_command("wget");
+    check_command("git");
+
+    println!();
+
+    // Health summary
+    println!("{}", "Health Summary:".bright_cyan().bold());
+    let mut issues = Vec::new();
+
+    if !llama_installed && !ollama_installed {
+        issues.push("No runtime installed");
+    }
+
+    if issues.is_empty() {
+        println!(
+            "  {} System is ready to use Yuuki!",
+            "✓".bright_green().bold()
+        );
+    } else {
+        println!(
+            "  {} {} issue(s) found:",
+            "⚠".bright_yellow(),
+            issues.len()
+        );
+        for issue in issues {
+            println!("    • {}", issue.bright_yellow());
+        }
+        println!();
+        println!(
+            "{} Run {} to install a runtime",
+            "→".bright_blue(),
+            "yuy runtime install".bright_green()
+        );
+    }
+
+    Ok(())
+}
+
+fn check_command(cmd: &str) {
+    if command_exists(cmd) {
+        println!("  {} {} {}", "✓".bright_green(), cmd.bright_white(), "available".bright_green());
+    } else {
+        println!("  {} {} {}", "✗".bright_yellow(), cmd.bright_white(), "not found".bright_yellow());
+    }
+}
--- a/yuy/src/commands/download.rs
+++ b/yuy/src/commands/download.rs
@@ -0,0 +1,140 @@
+use anyhow::{Context, Result};
+use colored::Colorize;
+use futures_util::StreamExt;
+use indicatif::{ProgressBar, ProgressStyle};
+use std::fs::File;
+use std::io::Write;
+use crate::config::{get_models_dir, HF_ORG, YUUKI_MODELS};
+use crate::utils::{detect_platform, get_available_ram_gb, recommend_quantization};
+
+pub async fn execute(model: &str, quant: Option<String>) -> Result<()> {
+    println!("{}", "📥 Yuuki Model Downloader".bright_cyan().bold());
+    println!();
+
+    // Validate model name
+    if !YUUKI_MODELS.contains(&model) {
+        println!(
+            "{} Model '{}' not found",
+            "✗".bright_red(),
+            model.bright_yellow()
+        );
+        println!("\n{}", "Available models:".bright_cyan());
+        for m in YUUKI_MODELS {
+            println!("  • {}", m.bright_green());
+        }
+        return Ok(());
+    }
+
+    // Determine quantization
+    let quantization = if let Some(q) = quant {
+        q
+    } else {
+        let platform = detect_platform();
+        let ram = get_available_ram_gb();
+        let recommended = recommend_quantization(platform, ram);
+        println!(
+            "{} Auto-selected quantization: {} (based on your system)",
+            "ℹ".bright_blue(),
+            recommended.bright_green()
+        );
+        recommended.to_string()
+    };
+
+    println!(
+        "{} Model: {}",
+        "→".bright_blue(),
+        model.bright_green().bold()
+    );
+    println!(
+        "{} Quantization: {}",
+        "→".bright_blue(),
+        quantization.bright_green()
+    );
+    println!(
+        "{} Source: {}/{}",
+        "→".bright_blue(),
+        HF_ORG.bright_yellow(),
+        model.bright_yellow()
+    );
+    println!();
+
+    // Create model directory
+    let models_dir = get_models_dir()?;
+    let model_dir = models_dir.join(model);
+    std::fs::create_dir_all(&model_dir)?;
+
+    // Construct Hugging Face URL
+    let filename = format!("{}-{}.gguf", model.to_lowercase(), quantization);
+    let url = format!(
+        "https://huggingface.co/{}/{}/resolve/main/{}",
+        HF_ORG, model, filename
+    );
+
+    println!("{} Downloading from Hugging Face...", "↓".bright_cyan());
+    println!("{} URL: {}", "  ".bright_black(), url.bright_black());
+    println!();
+
+    // Download file with progress bar
+    let client = reqwest::Client::new();
+    let response = client
+        .get(&url)
+        .send()
+        .await
+        .context("Failed to start download")?;
+
+    if !response.status().is_success() {
+        anyhow::bail!(
+            "Failed to download: HTTP {} - Model file might not exist yet. Try checking HuggingFace.",
+            response.status()
+        );
+    }
+
+    let total_size = response
+        .content_length()
+        .context("Failed to get content length")?;
+
+    let pb = ProgressBar::new(total_size);
+    pb.set_style(
+        ProgressStyle::default_bar()
+            .template("{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({eta})")?
+            .progress_chars("#>-"),
+    );
+
+    let output_path = model_dir.join(&filename);
+    let mut file = File::create(&output_path)?;
+    let mut downloaded: u64 = 0;
+    let mut stream = response.bytes_stream();
+
+    while let Some(item) = stream.next().await {
+        let chunk = item.context("Error while downloading file")?;
+        file.write_all(&chunk)?;
+        downloaded += chunk.len() as u64;
+        pb.set_position(downloaded);
+    }
+
+    pb.finish_with_message("Download complete!");
+    println!();
+
+    println!(
+        "{} Model downloaded successfully!",
+        "✓".bright_green().bold()
+    );
+    println!(
+        "  {} {}",
+        "Location:".bright_cyan(),
+        output_path.display().to_string().bright_yellow()
+    );
+    println!(
+        "  {} {}",
+        "Size:".bright_cyan(),
+        crate::utils::format_size(total_size).bright_yellow()
+    );
+    println!();
+    println!(
+        "{} Run the model with: {}",
+        "→".bright_blue(),
+        format!("yuy run {}", model).bright_green()
+    );
+
+    Ok(())
+}
--- a/yuy/src/commands/info.rs
+++ b/yuy/src/commands/info.rs
@@ -0,0 +1,73 @@
+use anyhow::Result;
+use colored::Colorize;
+use crate::config::{get_models_dir, YUUKI_MODELS, HF_ORG};
+
+pub async fn execute(model: &str, variants: bool) -> Result<()> {
+    println!("{}", "ℹ  Model Information".bright_cyan().bold());
+    println!();
+
+    if !YUUKI_MODELS.contains(&model) {
+        anyhow::bail!("Model '{}' not found. Use 'yuy list models --remote' to see available models.", model);
+    }
+
+    println!("{} {}", "Model:".bright_cyan().bold(), model.bright_yellow().bold());
+    println!(
+        "{} https://huggingface.co/{}/{}",
+        "URL:".bright_cyan(),
+        HF_ORG,
+        model
+    );
+    println!();
+
+    // Check local status
+    let models_dir = get_models_dir()?;
+    let model_dir = models_dir.join(model);
+
+    if model_dir.exists() {
+        println!("{} {}", "Status:".bright_cyan(), "Downloaded ✓".bright_green());
+        println!(
+            "{} {}",
+            "Location:".bright_cyan(),
+            model_dir.display().to_string().bright_yellow()
+        );
+        println!();
+
+        // List local variants
+        println!("{}", "Local Variants:".bright_cyan());
+        if let Ok(entries) = std::fs::read_dir(&model_dir) {
+            for entry in entries {
+                if let Ok(file) = entry {
+                    let filename = file.file_name();
+                    if filename.to_string_lossy().ends_with(".gguf") {
+                        let metadata = file.metadata()?;
+                        let size = crate::utils::format_size(metadata.len());
+                        println!(
+                            "  {} {} ({})",
+                            "•".bright_green(),
+                            filename.to_string_lossy().bright_white(),
+                            size.bright_black()
+                        );
+                    }
+                }
+            }
+        }
+    } else {
+        println!("{} {}", "Status:".bright_cyan(), "Not downloaded".bright_yellow());
+        println!(
+            "\n{} Download with: {}",
+            "→".bright_blue(),
+            format!("yuy download {}", model).bright_green()
+        );
+    }
+
+    if variants {
+        println!();
+        println!("{}", "Available Variants (on HuggingFace):".bright_cyan());
+        println!("  {} {} - Smallest size, good for mobile", "•".bright_green(), "q4_0".bright_white());
+        println!("  {} {} - Medium quality, balanced", "•".bright_green(), "q5_k_m".bright_white());
+        println!("  {} {} - High quality", "•".bright_green(), "q8_0".bright_white());
+        println!("  {} {} - Full precision (largest)", "•".bright_green(), "f32".bright_white());
+    }
+
+    Ok(())
+}
--- a/yuy/src/commands/list.rs
+++ b/yuy/src/commands/list.rs
@@ -0,0 +1,105 @@
+use anyhow::Result;
+use colored::Colorize;
+use crate::cli::ListTarget;
+use crate::config::{get_models_dir, YUUKI_MODELS, HF_ORG};
+
+pub async fn execute(target: ListTarget) -> Result<()> {
+    match target {
+        ListTarget::Models { remote } => {
+            if remote {
+                list_remote_models().await
+            } else {
+                list_local_models().await
+            }
+        }
+    }
+}
+
+async fn list_local_models() -> Result<()> {
+    println!("{}", "📋 Local Models".bright_cyan().bold());
+    println!();
+
+    let models_dir = get_models_dir()?;
+
+    if !models_dir.exists() || std::fs::read_dir(&models_dir)?.next().is_none() {
+        println!("{} No models downloaded yet.", "ℹ".bright_blue());
+        println!();
+        println!("{} Download a model:", "→".bright_blue());
+        println!("  {}", "yuy download Yuuki-best".bright_green());
+        return Ok(());
+    }
+
+    for entry in std::fs::read_dir(&models_dir)? {
+        let entry = entry?;
+        let model_name = entry.file_name();
+        let model_path = entry.path();
+
+        if model_path.is_dir() {
+            println!("{} {}", "•".bright_green(), model_name.to_string_lossy().bright_yellow().bold());
+
+            // List GGUF files in this model directory
+            if let Ok(files) = std::fs::read_dir(&model_path) {
+                for file_entry in files {
+                    if let Ok(file) = file_entry {
+                        let file_name = file.file_name();
+                        if file_name.to_string_lossy().ends_with(".gguf") {
+                            let metadata = file.metadata()?;
+                            let size = crate::utils::format_size(metadata.len());
+                            println!(
+                                "  {} {} ({})",
+                                "→".bright_blue(),
+                                file_name.to_string_lossy().bright_white(),
+                                size.bright_black()
+                            );
+                        }
+                    }
+                }
+            }
+            println!();
+        }
+    }
+
+    println!(
+        "{} Location: {}",
+        "📁".bright_blue(),
+        models_dir.display().to_string().bright_black()
+    );
+
+    Ok(())
+}
+
+async fn list_remote_models() -> Result<()> {
+    println!("{}", "🌐 Available Models (Hugging Face)".bright_cyan().bold());
+    println!();
+
+    println!(
+        "{} Organization: {}",
+        "→".bright_blue(),
+        HF_ORG.bright_yellow()
+    );
+    println!();
+
+    for model in YUUKI_MODELS {
+        println!("{} {}", "•".bright_green(), model.bright_yellow().bold());
+        println!(
+            "  {} https://huggingface.co/{}/{}",
+            "🔗".bright_blue(),
+            HF_ORG,
+            model
+        );
+        println!(
+            "  {} {}",
+            "📥".bright_blue(),
+            format!("yuy download {}", model).bright_green()
+        );
+        println!();
+    }
+
+    println!("{} Quantizations typically available:", "ℹ".bright_blue());
+    println!("  • {} (smallest, fastest)", "q4_0".bright_green());
+    println!("  • {} (balanced)", "q5_k_m".bright_green());
+    println!("  • {} (best quality)", "q8_0".bright_green());
+    println!("  • {} (full precision)", "f32".bright_green());
+
+    Ok(())
+}
--- a/yuy/src/commands/mod.rs
+++ b/yuy/src/commands/mod.rs
@@ -0,0 +1,8 @@
+pub mod download;
+pub mod run;
+pub mod list;
+pub mod info;
+pub mod remove;
+pub mod runtime;
+pub mod doctor;
+pub mod setup;
--- a/yuy/src/commands/remove.rs
+++ b/yuy/src/commands/remove.rs
@@ -0,0 +1,69 @@
+use anyhow::Result;
+use colored::Colorize;
+use crate::config::get_models_dir;
+
+pub async fn execute(model: &str) -> Result<()> {
+    println!("{}", "🗑️  Remove Model".bright_cyan().bold());
+    println!();
+
+    let models_dir = get_models_dir()?;
+    let model_dir = models_dir.join(model);
+
+    if !model_dir.exists() {
+        println!(
+            "{} Model '{}' is not downloaded.",
+            "ℹ".bright_blue(),
+            model.bright_yellow()
+        );
+        return Ok(());
+    }
+
+    // Calculate total size
+    let mut total_size = 0u64;
+    for entry in std::fs::read_dir(&model_dir)? {
+        if let Ok(file) = entry {
+            if let Ok(metadata) = file.metadata() {
+                total_size += metadata.len();
+            }
+        }
+    }
+
+    println!(
+        "{} About to remove: {}",
+        "⚠".bright_yellow(),
+        model.bright_yellow().bold()
+    );
+    println!(
+        "{} Space to free: {}",
+        "→".bright_blue(),
+        crate::utils::format_size(total_size).bright_green()
+    );
+    println!();
+
+    print!("{} Are you sure? [y/N]: ", "?".bright_cyan());
+    std::io::Write::flush(&mut std::io::stdout())?;
+
+    let mut input = String::new();
+    std::io::stdin().read_line(&mut input)?;
+
+    if input.trim().to_lowercase() != "y" {
+        println!("{} Cancelled.", "ℹ".bright_blue());
+        return Ok(());
+    }
+
+    std::fs::remove_dir_all(&model_dir)?;
+
+    println!();
+    println!(
+        "{} Model '{}' removed successfully.",
+        "✓".bright_green(),
+        model.bright_yellow()
+    );
+    println!(
+        "{} Freed {} of space.",
+        "→".bright_blue(),
+        crate::utils::format_size(total_size).bright_green()
+    );
+
+    Ok(())
+}
--- a/yuy/src/commands/run.rs
+++ b/yuy/src/commands/run.rs
@@ -0,0 +1,152 @@
+use anyhow::{Context, Result};
+use colored::Colorize;
+use std::process::Command;
+use crate::config::{get_models_dir, YUUKI_MODELS};
+use crate::utils::command_exists;
+
+pub async fn execute(
+    model: &str,
+    runtime: Option<String>,
+    quant: Option<String>,
+    preset: Option<String>,
+    _resume: bool,
+    _template: Option<String>,
+) -> Result<()> {
+    println!("{}", "🚀 Yuuki Runtime".bright_cyan().bold());
+    println!();
+
+    // Validate model
+    if !YUUKI_MODELS.contains(&model) {
+        anyhow::bail!("Model '{}' not found. Use 'yuy list models' to see available models.", model);
+    }
+
+    // Check if model is downloaded
+    let models_dir = get_models_dir()?;
+    let model_dir = models_dir.join(model);
+    
+    if !model_dir.exists() {
+        println!(
+            "{} Model '{}' is not downloaded yet.",
+            "✗".bright_red(),
+            model.bright_yellow()
+        );
+        println!(
+            "\n{} Download it first: {}",
+            "→".bright_blue(),
+            format!("yuy download {}", model).bright_green()
+        );
+        return Ok(());
+    }
+
+    // Find GGUF file
+    let quant_str = quant.unwrap_or_else(|| "q5_k_m".to_string());
+    let filename = format!("{}-{}.gguf", model.to_lowercase(), quant_str);
+    let model_path = model_dir.join(&filename);
+
+    if !model_path.exists() {
+        anyhow::bail!(
+            "Model file '{}' not found. Available quantizations may differ. Try: yuy info {}",
+            filename, model
+        );
+    }
+
+    println!("{} Model: {}", "→".bright_blue(), model.bright_green());
+    println!("{} File: {}", "→".bright_blue(), filename.bright_yellow());
+    if let Some(p) = &preset {
+        println!("{} Preset: {}", "→".bright_blue(), p.bright_magenta());
+    }
+    println!();
+
+    // Determine runtime
+    let runtime_name = runtime.unwrap_or_else(|| "llama-cpp".to_string());
+
+    match runtime_name.as_str() {
+        "llama-cpp" => run_with_llama_cpp(&model_path, preset).await,
+        "ollama" => run_with_ollama(model, &model_path).await,
+        _ => anyhow::bail!("Unknown runtime: {}. Use 'llama-cpp' or 'ollama'", runtime_name),
+    }
+}
+
+async fn run_with_llama_cpp(model_path: &std::path::Path, preset: Option<String>) -> Result<()> {
+    // Check if llama-cli or llama.cpp exists
+    let llama_cmd = if command_exists("llama-cli") {
+        "llama-cli"
+    } else if command_exists("llama") {
+        "llama"
+    } else if command_exists("main") {
+        "main"
+    } else {
+        println!("{} llama.cpp not found!", "✗".bright_red());
+        println!("\n{} Install it first:", "→".bright_blue());
+        println!("  Termux: {}", "pkg install llama-cpp".bright_green());
+        println!("  Other: {}", "yuy runtime install llama-cpp".bright_green());
+        return Ok(());
+    };
+
+    println!(
+        "{} Starting llama.cpp interactive mode...",
+        "▶".bright_green()
+    );
+    println!();
+
+    // Configure parameters based on preset
+    let (temp, top_p) = match preset.as_deref() {
+        Some("creative") => (0.8, 0.9),
+        Some("precise") => (0.3, 0.5),
+        Some("balanced") | None => (0.6, 0.7),
+        _ => (0.6, 0.7),
+    };
+
+    // Run llama.cpp
+    let status = Command::new(llama_cmd)
+        .arg("-m")
+        .arg(model_path)
+        .arg("--interactive")
+        .arg("--temp")
+        .arg(temp.to_string())
+        .arg("--top-p")
+        .arg(top_p.to_string())
+        .arg("-c")
+        .arg("4096")
+        .status()
+        .context("Failed to execute llama.cpp")?;
+
+    if !status.success() {
+        anyhow::bail!("llama.cpp exited with error");
+    }
+
+    Ok(())
+}
+
+async fn run_with_ollama(model: &str, _model_path: &std::path::Path) -> Result<()> {
+    if !command_exists("ollama") {
+        println!("{} ollama not found!", "✗".bright_red());
+        println!("\n{} Install it first:", "→".bright_blue());
+        println!("  Termux: {}", "pkg install ollama".bright_green());
+        println!("  Other: {}", "yuy runtime install ollama".bright_green());
+        return Ok(());
+    }
+
+    println!(
+        "{} Note: Ollama integration is experimental.",
+        "⚠".bright_yellow()
+    );
+    println!("{} You'll need to import the model to Ollama first.", "→".bright_blue());
+    println!();
+
+    // Check if ollama serve is running
+    println!("{} Checking Ollama service...", "→".bright_blue());
+    
+    // Try to run with ollama (this is simplified, real impl would be more complex)
+    let status = Command::new("ollama")
+        .arg("run")
+        .arg(model)
+        .status()
+        .context("Failed to execute ollama")?;
+
+    if !status.success() {
+        println!("\n{} If the model is not in Ollama, you need to import it first.", "ℹ".bright_blue());
+    }
+
+    Ok(())
+}
--- a/yuy/src/commands/runtime.rs
+++ b/yuy/src/commands/runtime.rs
@@ -0,0 +1,254 @@
+use anyhow::Result;
+use colored::Colorize;
+use std::process::Command;
+use crate::cli::RuntimeAction;
+use crate::utils::{command_exists, detect_platform, Platform};
+
+pub async fn execute(action: RuntimeAction) -> Result<()> {
+    match action {
+        RuntimeAction::Check => check_runtimes().await,
+        RuntimeAction::Install { runtime } => install_runtime(runtime).await,
+        RuntimeAction::List => list_runtimes().await,
+    }
+}
+
+async fn check_runtimes() -> Result<()> {
+    println!("{}", "🔍 Runtime Check".bright_cyan().bold());
+    println!();
+
+    // Check llama.cpp
+    let llama_installed = command_exists("llama-cli")
+        || command_exists("llama")
+        || command_exists("main");
+
+    if llama_installed {
+        println!("{} {}", "✓".bright_green(), "llama.cpp".bright_white().bold());
+        
+        // Try to get version
+        if command_exists("llama-cli") {
+            if let Ok(output) = Command::new("llama-cli").arg("--version").output() {
+                if let Ok(version) = String::from_utf8(output.stdout) {
+                    println!("  {} {}", "→".bright_blue(), version.trim().bright_black());
+                }
+            }
+        }
+    } else {
+        println!("{} {}", "✗".bright_red(), "llama.cpp".bright_white().bold());
+        println!("  {} Not installed", "→".bright_black());
+    }
+
+    println!();
+
+    // Check ollama
+    let ollama_installed = command_exists("ollama");
+
+    if ollama_installed {
+        println!("{} {}", "✓".bright_green(), "ollama".bright_white().bold());
+        
+        if let Ok(output) = Command::new("ollama").arg("--version").output() {
+            if let Ok(version) = String::from_utf8(output.stdout) {
+                println!("  {} {}", "→".bright_blue(), version.trim().bright_black());
+            }
+        }
+    } else {
+        println!("{} {}", "✗".bright_red(), "ollama".bright_white().bold());
+        println!("  {} Not installed", "→".bright_black());
+    }
+
+    println!();
+
+    if !llama_installed && !ollama_installed {
+        println!("{} No runtimes installed!", "⚠".bright_yellow());
+        println!();
+        println!("{} Install a runtime:", "→".bright_blue());
+        println!("  {}", "yuy runtime install".bright_green());
+    }
+
+    Ok(())
+}
+
+async fn install_runtime(runtime: Option<String>) -> Result<()> {
+    println!("{}", "📦 Runtime Installation".bright_cyan().bold());
+    println!();
+
+    let platform = detect_platform();
+    
+    let runtime_name = if let Some(r) = runtime {
+        r
+    } else {
+        // Interactive selection
+        println!("{} Select a runtime to install:", "?".bright_cyan());
+        println!("  {} llama.cpp (recommended, lighter)", "1.".bright_white());
+        println!("  {} ollama (more features, heavier)", "2.".bright_white());
+        println!();
+        print!("{} Enter choice [1/2]: ", "?".bright_cyan());
+        std::io::Write::flush(&mut std::io::stdout())?;
+
+        let mut input = String::new();
+        std::io::stdin().read_line(&mut input)?;
+
+        match input.trim() {
+            "1" => "llama-cpp".to_string(),
+            "2" => "ollama".to_string(),
+            _ => {
+                println!("{} Invalid choice", "✗".bright_red());
+                return Ok(());
+            }
+        }
+    };
+
+    println!();
+    println!(
+        "{} Installing: {}",
+        "→".bright_blue(),
+        runtime_name.bright_green().bold()
+    );
+    println!();
+
+    match platform {
+        Platform::Termux => install_on_termux(&runtime_name).await,
+        Platform::Linux => install_on_linux(&runtime_name).await,
+        Platform::MacOS => install_on_macos(&runtime_name).await,
+        Platform::Windows => install_on_windows(&runtime_name).await,
+        Platform::Unknown => {
+            println!("{} Platform not supported for auto-install", "✗".bright_red());
+            show_manual_instructions(&runtime_name, platform);
+            Ok(())
+        }
+    }
+}
+
+async fn install_on_termux(runtime: &str) -> Result<()> {
+    let package = match runtime {
+        "llama-cpp" => "llama-cpp",
+        "ollama" => "ollama",
+        _ => anyhow::bail!("Unknown runtime: {}", runtime),
+    };
+
+    println!("{} Running: pkg install {}", "→".bright_blue(), package);
+    println!();
+
+    let status = Command::new("pkg")
+        .arg("install")
+        .arg(package)
+        .arg("-y")
+        .status()?;
+
+    if status.success() {
+        println!();
+        println!(
+            "{} {} installed successfully!",
+            "✓".bright_green(),
+            runtime.bright_green()
+        );
+    } else {
+        anyhow::bail!("Installation failed");
+    }
+
+    Ok(())
+}
+
+async fn install_on_linux(runtime: &str) -> Result<()> {
+    println!("{} Linux installation instructions:", "ℹ".bright_blue());
+    show_manual_instructions(runtime, Platform::Linux);
+    Ok(())
+}
+
+async fn install_on_macos(runtime: &str) -> Result<()> {
+    println!("{} macOS installation:", "ℹ".bright_blue());
+    println!();
+
+    if command_exists("brew") {
+        println!("{} Homebrew detected, attempting install...", "→".bright_blue());
+        println!();
+
+        let package = match runtime {
+            "llama-cpp" => "llama.cpp",
+            "ollama" => "ollama",
+            _ => anyhow::bail!("Unknown runtime: {}", runtime),
+        };
+
+        let status = Command::new("brew")
+            .arg("install")
+            .arg(package)
+            .status()?;
+
+        if status.success() {
+            println!();
+            println!(
+                "{} {} installed successfully!",
+                "✓".bright_green(),
+                runtime.bright_green()
+            );
+        } else {
+            anyhow::bail!("Installation failed");
+        }
+    } else {
+        println!("{} Homebrew not found", "✗".bright_red());
+        show_manual_instructions(runtime, Platform::MacOS);
+    }
+
+    Ok(())
+}
+
+async fn install_on_windows(_runtime: &str) -> Result<()> {
+    println!("{} Windows installation:", "ℹ".bright_blue());
+    println!();
+    println!("{} Automatic installation not yet supported on Windows", "⚠".bright_yellow());
+    println!();
+    println!("{} Manual installation:", "→".bright_blue());
+    println!("  1. Install Chocolatey: https://chocolatey.org/install");
+    println!("  2. Run: choco install llama-cpp  (or ollama)");
+    println!();
+    println!("{} Or download binaries:", "→".bright_blue());
+    println!("  • llama.cpp: https://github.com/ggerganov/llama.cpp/releases");
+    println!("  • ollama: https://ollama.com/download");
+
+    Ok(())
+}
+
+fn show_manual_instructions(runtime: &str, platform: Platform) {
+    println!();
+    println!("{}", "Manual Installation:".bright_cyan().bold());
+    println!();
+
+    match (runtime, platform) {
+        ("llama-cpp", Platform::Linux) => {
+            println!("{}  Option 1 - Download binary:", "1.".bright_white());
+            println!("   {}", "https://github.com/ggerganov/llama.cpp/releases".bright_blue());
+            println!();
+            println!("{}  Option 2 - Build from source:", "2.".bright_white());
+            println!("   git clone https://github.com/ggerganov/llama.cpp");
+            println!("   cd llama.cpp && make");
+        }
+        ("ollama", Platform::Linux) => {
+            println!("{}  Run the install script:", "→".bright_blue());
+            println!("   curl -fsSL https://ollama.com/install.sh | sh");
+        }
+        _ => {
+            println!("{} Visit the official website for instructions", "→".bright_blue());
+        }
+    }
+}
+
+async fn list_runtimes() -> Result<()> {
+    println!("{}", "📋 Available Runtimes".bright_cyan().bold());
+    println!();
+
+    println!("{}", "llama.cpp".bright_green().bold());
+    println!("  {} Fast, lightweight C++ inference engine", "→".bright_blue());
+    println!("  {} Best for: Termux, low-end devices, direct CLI usage", "→".bright_blue());
+    println!("  {} https://github.com/ggerganov/llama.cpp", "🔗".bright_blue());
+    println!();
+
+    println!("{}", "ollama".bright_green().bold());
+    println!("  {} User-friendly model management with server", "→".bright_blue());
+    println!("  {} Best for: Desktop usage, multiple models, API access", "→".bright_blue());
+    println!("  {} https://ollama.com", "🔗".bright_blue());
+    println!();
+
+    println!("{} Install a runtime:", "→".bright_blue());
+    println!("  {}", "yuy runtime install".bright_white());
+
+    Ok(())
+}
--- a/yuy/src/commands/setup.rs
+++ b/yuy/src/commands/setup.rs
@@ -0,0 +1,104 @@
+use anyhow::Result;
+use colored::Colorize;
+use crate::config::{get_yuuki_dir, get_models_dir};
+use crate::utils::{command_exists, detect_platform};
+
+pub async fn execute() -> Result<()> {
+    println!("{}", "🌸 Yuuki Setup Wizard".bright_magenta().bold());
+    println!();
+
+    println!("{}", "Welcome to Yuuki! Let's get you set up.".bright_cyan());
+    println!();
+
+    // Step 1: Create directories
+    println!("{} Creating directories...", "1.".bright_white().bold());
+    let yuuki_dir = get_yuuki_dir()?;
+    let models_dir = get_models_dir()?;
+
+    println!(
+        "  {} {}",
+        "✓".bright_green(),
+        yuuki_dir.display().to_string().bright_yellow()
+    );
+    println!(
+        "  {} {}",
+        "✓".bright_green(),
+        models_dir.display().to_string().bright_yellow()
+    );
+    println!();
+
+    // Step 2: Check platform
+    println!("{} Detecting platform...", "2.".bright_white().bold());
+    let platform = detect_platform();
+    println!("  {} {:?}", "→".bright_blue(), platform);
+    println!();
+
+    // Step 3: Check runtimes
+    println!("{} Checking for runtimes...", "3.".bright_white().bold());
+    
+    let llama_installed = command_exists("llama-cli")
+        || command_exists("llama")
+        || command_exists("main");
+    let ollama_installed = command_exists("ollama");
+
+    if llama_installed {
+        println!("  {} llama.cpp found", "✓".bright_green());
+    } else {
+        println!("  {} llama.cpp not found", "✗".bright_yellow());
+    }
+
+    if ollama_installed {
+        println!("  {} ollama found", "✓".bright_green());
+    } else {
+        println!("  {} ollama not found", "✗".bright_yellow());
+    }
+
+    println!();
+
+    // Step 4: Offer to install runtime
+    if !llama_installed && !ollama_installed {
+        println!("{} No runtime detected.", "⚠".bright_yellow());
+        println!();
+        print!("{} Would you like to install a runtime now? [y/N]: ", "?".bright_cyan());
+        std::io::Write::flush(&mut std::io::stdout())?;
+
+        let mut input = String::new();
+        std::io::stdin().read_line(&mut input)?;
+
+        if input.trim().to_lowercase() == "y" {
+            println!();
+            crate::commands::runtime::execute(crate::cli::RuntimeAction::Install { runtime: None }).await?;
+        } else {
+            println!();
+            println!("{} Skipping runtime installation.", "→".bright_blue());
+            println!(
+                "{} You can install later with: {}",
+                "→".bright_blue(),
+                "yuy runtime install".bright_green()
+            );
+        }
+    } else {
+        println!("{} Runtime ready!", "✓".bright_green());
+    }
+
+    println!();
+
+    // Step 5: Summary
+    println!("{}", "✨ Setup Complete!".bright_green().bold());
+    println!();
+    println!("{}", "Next steps:".bright_cyan());
+    println!("  {} Download a model:", "1.".bright_white());
+    println!("     {}", "yuy download Yuuki-best".bright_green());
+    println!();
+    println!("  {} Run the model:", "2.".bright_white());
+    println!("     {}", "yuy run Yuuki-best".bright_green());
+    println!();
+    println!("  {} Check system health:", "3.".bright_white());
+    println!("     {}", "yuy doctor".bright_green());
+    println!();
+
+    println!("{} Need help? Visit:", "📚".bright_blue());
+    println!("  https://github.com/YuuKi-OS/yuy");
+
+    Ok(())
+}
--- a/yuy/src/config.rs
+++ b/yuy/src/config.rs
@@ -0,0 +1,72 @@
+use anyhow::{Context, Result};
+use serde::{Deserialize, Serialize};
+use std::fs;
+use std::path::PathBuf;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Config {
+    pub hf_token: Option<String>,
+    pub default_runtime: Option<String>,
+    pub default_quant: Option<String>,
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            hf_token: None,
+            default_runtime: Some("llama-cpp".to_string()),
+            default_quant: Some("q5_k_m".to_string()),
+        }
+    }
+}
+
+pub fn get_yuuki_dir() -> Result<PathBuf> {
+    let home = dirs::home_dir().context("Could not find home directory")?;
+    let yuuki_dir = home.join(".yuuki");
+    
+    if !yuuki_dir.exists() {
+        fs::create_dir_all(&yuuki_dir)?;
+    }
+    
+    Ok(yuuki_dir)
+}
+
+pub fn get_models_dir() -> Result<PathBuf> {
+    let yuuki_dir = get_yuuki_dir()?;
+    let models_dir = yuuki_dir.join("models");
+    
+    if !models_dir.exists() {
+        fs::create_dir_all(&models_dir)?;
+    }
+    
+    Ok(models_dir)
+}
+
+pub fn get_config_path() -> Result<PathBuf> {
+    let yuuki_dir = get_yuuki_dir()?;
+    Ok(yuuki_dir.join("config.toml"))
+}
+
+pub fn load_config() -> Result<Config> {
+    let config_path = get_config_path()?;
+    
+    if !config_path.exists() {
+        let config = Config::default();
+        save_config(&config)?;
+        return Ok(config);
+    }
+    
+    let content = fs::read_to_string(config_path)?;
+    let config: Config = toml::from_str(&content)?;
+    Ok(config)
+}
+
+pub fn save_config(config: &Config) -> Result<()> {
+    let config_path = get_config_path()?;
+    let content = toml::to_string_pretty(config)?;
+    fs::write(config_path, content)?;
+    Ok(())
+}
+
+pub const YUUKI_MODELS: &[&str] = &["Yuuki-best", "Yuuki-3.7", "Yuuki-v0.1"];
+pub const HF_ORG: &str = "OpceanAI";
--- a/yuy/src/main.rs
+++ b/yuy/src/main.rs
@@ -0,0 +1,76 @@
+mod cli;
+mod commands;
+mod config;
+mod utils;
+
+use clap::Parser;
+use cli::{Cli, Commands};
+use colored::Colorize;
+
+#[tokio::main]
+async fn main() {
+    let cli = Cli::parse();
+
+    let result = match cli.command {
+        Some(Commands::Download { model, quant }) => {
+            commands::download::execute(&model, quant).await
+        }
+        Some(Commands::Run { model, runtime, quant, preset, resume, template }) => {
+            commands::run::execute(&model, runtime, quant, preset, resume, template).await
+        }
+        Some(Commands::List { target }) => {
+            commands::list::execute(target).await
+        }
+        Some(Commands::Info { model, variants }) => {
+            commands::info::execute(&model, variants).await
+        }
+        Some(Commands::Remove { model }) => {
+            commands::remove::execute(&model).await
+        }
+        Some(Commands::Runtime { action }) => {
+            commands::runtime::execute(action).await
+        }
+        Some(Commands::Doctor) => {
+            commands::doctor::execute().await
+        }
+        Some(Commands::Setup) => {
+            commands::setup::execute().await
+        }
+        None => {
+            print_banner();
+            println!("{}", "Type 'yuy --help' for usage information\n".bright_cyan());
+            Ok(())
+        }
+    };
+
+    if let Err(e) = result {
+        eprintln!("{} {}", "Error:".bright_red().bold(), e);
+        std::process::exit(1);
+    }
+}
+
+fn print_banner() {
+    println!(
+        "{}",
+        r#"
+$$\     $$\                    
+\$$\   $$  |                   
+ \$$\ $$  /$$\   $$\ $$\   $$\ 
+  \$$$$  / $$ |  $$ |$$ |  $$ |
+   \$$  /  $$ |  $$ |$$ |  $$ |
+    $$ |   $$ |  $$ |$$ |  $$ |
+    $$ |   \$$$$$$  |\$$$$$$$ |
+    \__|    \______/  \____$$ |
+                     $$\   $$ |
+                     \$$$$$$  |
+                      \______/ 
+        "#
+        .bright_magenta()
+    );
+    println!(
+        "{}\n",
+        "Yuuki CLI v0.1.0 - Official AI Model Manager"
+            .bright_cyan()
+            .bold()
+    );
+}
--- a/yuy/src/utils.rs
+++ b/yuy/src/utils.rs
@@ -0,0 +1,68 @@
+use anyhow::Result;
+use std::process::Command;
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum Platform {
+    Linux,
+    MacOS,
+    Windows,
+    Termux,
+    Unknown,
+}
+
+pub fn detect_platform() -> Platform {
+    let os = std::env::consts::OS;
+    
+    match os {
+        "linux" => {
+            // Check if running in Termux
+            if std::env::var("PREFIX").is_ok() && std::env::var("PREFIX").unwrap().contains("com.termux") {
+                Platform::Termux
+            } else {
+                Platform::Linux
+            }
+        }
+        "macos" => Platform::MacOS,
+        "windows" => Platform::Windows,
+        _ => Platform::Unknown,
+    }
+}
+
+pub fn get_available_ram_gb() -> usize {
+    // Simplified: assume 8GB if we can't detect
+    // In production, use sys-info or similar
+    8
+}
+
+pub fn command_exists(cmd: &str) -> bool {
+    Command::new("which")
+        .arg(cmd)
+        .output()
+        .map(|output| output.status.success())
+        .unwrap_or(false)
+}
+
+pub fn format_size(bytes: u64) -> String {
+    const GB: u64 = 1024 * 1024 * 1024;
+    const MB: u64 = 1024 * 1024;
+    const KB: u64 = 1024;
+
+    if bytes >= GB {
+        format!("{:.2} GB", bytes as f64 / GB as f64)
+    } else if bytes >= MB {
+        format!("{:.2} MB", bytes as f64 / MB as f64)
+    } else if bytes >= KB {
+        format!("{:.2} KB", bytes as f64 / KB as f64)
+    } else {
+        format!("{} B", bytes)
+    }
+}
+
+pub fn recommend_quantization(platform: Platform, ram_gb: usize) -> &'static str {
+    match (platform, ram_gb) {
+        (Platform::Termux, _) => "q4_0",
+        (_, ram) if ram < 8 => "q4_k_m",
+        (_, ram) if ram < 16 => "q5_k_m",
+        _ => "q8_0",
+    }
+}