import 'dart:io'; import 'package:path/path.dart' as p; import 'package:path_provider/path_provider.dart'; /// Manages the two llama.cpp server processes that provide AI features. /// /// Both processes are kept alive for the lifetime of the app and must be /// killed on shutdown to prevent zombie processes from consuming RAM. /// /// - Qwen 2.5 7B → port 8080 (chat / completions) /// - Nomic Embed → port 8081 (embeddings) class AiProcessManager { Process? _qwenProcess; Process? _nomicProcess; bool _running = false; // ------------------------------------------------------------------------- // Public API // ------------------------------------------------------------------------- bool get isRunning => _running; /// Starts both inference servers. No-ops if already running or if the /// llama-server binary is not present on disk. Future startServers() async { if (_running) return; final dir = await getApplicationDocumentsDirectory(); final base = dir.path; final serverBin = p.join( base, Platform.isWindows ? 'llama-server.exe' : 'llama-server', ); if (!File(serverBin).existsSync()) return; try { // ── Qwen 2.5 7B chat server ────────────────────────────────────────── _qwenProcess = await Process.start( serverBin, [ '-m', p.join(base, 'qwen2.5-7b-instruct-q4_k_m.gguf'), '--port', '8080', '--ctx-size', '4096', '-ngl', '99', // offload all layers to GPU ], runInShell: false, ); // Drain pipes so the process is never blocked by a full buffer. _qwenProcess!.stdout.drain>(); _qwenProcess!.stderr.drain>(); // ── Nomic embedding server ─────────────────────────────────────────── _nomicProcess = await Process.start( serverBin, [ '-m', p.join(base, 'nomic-embed-text-v1.5.Q4_K_M.gguf'), '--port', '8081', '--ctx-size', '8192', '--embedding', ], runInShell: false, ); _nomicProcess!.stdout.drain>(); _nomicProcess!.stderr.drain>(); _running = true; } catch (_) { // Clean up any partially-started processes before rethrowing. _qwenProcess?.kill(); _nomicProcess?.kill(); _qwenProcess = null; _nomicProcess = null; rethrow; } } /// Kills both processes and resets the running flag. /// Safe to call even if servers were never started. Future stopServers() async { _qwenProcess?.kill(); _nomicProcess?.kill(); _qwenProcess = null; _nomicProcess = null; _running = false; } }