This commit is contained in:
88
lib/domain/services/ai_process_manager.dart
Normal file
88
lib/domain/services/ai_process_manager.dart
Normal file
@@ -0,0 +1,88 @@
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:path/path.dart' as p;
|
||||
import 'package:path_provider/path_provider.dart';
|
||||
|
||||
/// Manages the two llama.cpp server processes that provide AI features.
|
||||
///
|
||||
/// Both processes are kept alive for the lifetime of the app and must be
|
||||
/// killed on shutdown to prevent zombie processes from consuming RAM.
|
||||
///
|
||||
/// - Qwen 2.5 7B → port 8080 (chat / completions)
|
||||
/// - Nomic Embed → port 8081 (embeddings)
|
||||
class AiProcessManager {
|
||||
Process? _qwenProcess;
|
||||
Process? _nomicProcess;
|
||||
bool _running = false;
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Public API
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
bool get isRunning => _running;
|
||||
|
||||
/// Starts both inference servers. No-ops if already running or if the
|
||||
/// llama-server binary is not present on disk.
|
||||
Future<void> startServers() async {
|
||||
if (_running) return;
|
||||
|
||||
final dir = await getApplicationDocumentsDirectory();
|
||||
final base = dir.path;
|
||||
final serverBin = p.join(
|
||||
base,
|
||||
Platform.isWindows ? 'llama-server.exe' : 'llama-server',
|
||||
);
|
||||
|
||||
if (!File(serverBin).existsSync()) return;
|
||||
|
||||
try {
|
||||
// ── Qwen 2.5 7B chat server ──────────────────────────────────────────
|
||||
_qwenProcess = await Process.start(
|
||||
serverBin,
|
||||
[
|
||||
'-m', p.join(base, 'qwen2.5-7b-instruct-q4_k_m.gguf'),
|
||||
'--port', '8080',
|
||||
'--ctx-size', '4096',
|
||||
'-ngl', '99', // offload all layers to GPU
|
||||
],
|
||||
runInShell: false,
|
||||
);
|
||||
// Drain pipes so the process is never blocked by a full buffer.
|
||||
_qwenProcess!.stdout.drain<List<int>>();
|
||||
_qwenProcess!.stderr.drain<List<int>>();
|
||||
|
||||
// ── Nomic embedding server ───────────────────────────────────────────
|
||||
_nomicProcess = await Process.start(
|
||||
serverBin,
|
||||
[
|
||||
'-m', p.join(base, 'nomic-embed-text-v1.5.Q4_K_M.gguf'),
|
||||
'--port', '8081',
|
||||
'--ctx-size', '8192',
|
||||
'--embedding',
|
||||
],
|
||||
runInShell: false,
|
||||
);
|
||||
_nomicProcess!.stdout.drain<List<int>>();
|
||||
_nomicProcess!.stderr.drain<List<int>>();
|
||||
|
||||
_running = true;
|
||||
} catch (_) {
|
||||
// Clean up any partially-started processes before rethrowing.
|
||||
_qwenProcess?.kill();
|
||||
_nomicProcess?.kill();
|
||||
_qwenProcess = null;
|
||||
_nomicProcess = null;
|
||||
rethrow;
|
||||
}
|
||||
}
|
||||
|
||||
/// Kills both processes and resets the running flag.
|
||||
/// Safe to call even if servers were never started.
|
||||
Future<void> stopServers() async {
|
||||
_qwenProcess?.kill();
|
||||
_nomicProcess?.kill();
|
||||
_qwenProcess = null;
|
||||
_nomicProcess = null;
|
||||
_running = false;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user