This commit is contained in:
174
lib/data/services/ai_process_manager.dart
Normal file
174
lib/data/services/ai_process_manager.dart
Normal file
@@ -0,0 +1,174 @@
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:path/path.dart' as p;
|
||||
import 'package:path_provider/path_provider.dart';
|
||||
import 'package:trainhub_flutter/core/constants/ai_constants.dart';
|
||||
|
||||
enum AiServerStatus { offline, starting, ready, error }
|
||||
|
||||
/// Manages the two llama.cpp server processes that provide AI features.
|
||||
///
|
||||
/// Both processes are kept alive for the lifetime of the app and must be
|
||||
/// killed on shutdown to prevent zombie processes from consuming RAM.
|
||||
///
|
||||
/// - Qwen 2.5 7B → port 8080 (chat / completions)
|
||||
/// - Nomic Embed → port 8081 (embeddings)
|
||||
class AiProcessManager extends ChangeNotifier {
|
||||
Process? _qwenProcess;
|
||||
Process? _nomicProcess;
|
||||
AiServerStatus _status = AiServerStatus.offline;
|
||||
String? _lastError;
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Public API
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
AiServerStatus get status => _status;
|
||||
bool get isRunning => _status == AiServerStatus.ready;
|
||||
String? get errorMessage => _lastError;
|
||||
|
||||
/// Starts both inference servers. No-ops if already running or starting.
|
||||
Future<void> startServers() async {
|
||||
if (_status == AiServerStatus.starting || _status == AiServerStatus.ready) {
|
||||
return;
|
||||
}
|
||||
|
||||
_updateStatus(AiServerStatus.starting);
|
||||
_lastError = null;
|
||||
|
||||
final dir = await getApplicationDocumentsDirectory();
|
||||
final base = dir.path;
|
||||
final serverBin = p.join(base, AiConstants.serverBinaryName);
|
||||
|
||||
if (!File(serverBin).existsSync()) {
|
||||
_lastError = 'llama-server executable not found.';
|
||||
_updateStatus(AiServerStatus.error);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
_qwenProcess = await Process.start(serverBin, [
|
||||
'-m', p.join(base, AiConstants.qwenModelFile),
|
||||
'--port', '${AiConstants.chatServerPort}',
|
||||
'--ctx-size', '${AiConstants.qwenContextSize}',
|
||||
'-ngl', '${AiConstants.gpuLayerOffload}',
|
||||
], runInShell: false);
|
||||
|
||||
_qwenProcess!.stdout.listen((event) {
|
||||
if (kDebugMode) print('[QWEN STDOUT] ${String.fromCharCodes(event)}');
|
||||
});
|
||||
_qwenProcess!.stderr.listen((event) {
|
||||
if (kDebugMode) print('[QWEN STDERR] ${String.fromCharCodes(event)}');
|
||||
});
|
||||
|
||||
// Monitor for unexpected crash
|
||||
_qwenProcess!.exitCode.then((code) {
|
||||
if (_status == AiServerStatus.ready ||
|
||||
_status == AiServerStatus.starting) {
|
||||
_lastError = 'Qwen Chat Server crashed with code $code';
|
||||
_updateStatus(AiServerStatus.error);
|
||||
}
|
||||
});
|
||||
|
||||
_nomicProcess = await Process.start(serverBin, [
|
||||
'-m', p.join(base, AiConstants.nomicModelFile),
|
||||
'--port', '${AiConstants.embeddingServerPort}',
|
||||
'--ctx-size', '${AiConstants.nomicContextSize}',
|
||||
'--embedding',
|
||||
], runInShell: false);
|
||||
|
||||
_nomicProcess!.stdout.listen((event) {
|
||||
if (kDebugMode) print('[NOMIC STDOUT] ${String.fromCharCodes(event)}');
|
||||
});
|
||||
_nomicProcess!.stderr.listen((event) {
|
||||
if (kDebugMode) print('[NOMIC STDERR] ${String.fromCharCodes(event)}');
|
||||
});
|
||||
|
||||
// Monitor for unexpected crash
|
||||
_nomicProcess!.exitCode.then((code) {
|
||||
if (_status == AiServerStatus.ready ||
|
||||
_status == AiServerStatus.starting) {
|
||||
_lastError = 'Nomic Embedding Server crashed with code $code';
|
||||
_updateStatus(AiServerStatus.error);
|
||||
}
|
||||
});
|
||||
|
||||
// Wait for servers to bind to their ports and allocate memory.
|
||||
// This is crucial because loading models (especially 7B) takes several
|
||||
// seconds and significant RAM, which might cause the dart process to appear hung.
|
||||
int attempts = 0;
|
||||
bool qwenReady = false;
|
||||
bool nomicReady = false;
|
||||
|
||||
while (attempts < 20 && (!qwenReady || !nomicReady)) {
|
||||
await Future.delayed(const Duration(milliseconds: 500));
|
||||
|
||||
if (!qwenReady) {
|
||||
qwenReady = await _isPortReady(AiConstants.chatServerPort);
|
||||
}
|
||||
if (!nomicReady) {
|
||||
nomicReady = await _isPortReady(AiConstants.embeddingServerPort);
|
||||
}
|
||||
|
||||
attempts++;
|
||||
}
|
||||
|
||||
if (!qwenReady || !nomicReady) {
|
||||
throw Exception('Servers failed to start within 10 seconds.');
|
||||
}
|
||||
|
||||
_updateStatus(AiServerStatus.ready);
|
||||
} catch (e) {
|
||||
// Clean up any partially-started processes before returning error.
|
||||
_qwenProcess?.kill();
|
||||
_nomicProcess?.kill();
|
||||
_qwenProcess = null;
|
||||
_nomicProcess = null;
|
||||
_lastError = e.toString();
|
||||
_updateStatus(AiServerStatus.error);
|
||||
}
|
||||
}
|
||||
|
||||
/// Kills both processes and resets the running flag.
|
||||
/// Safe to call even if servers were never started.
|
||||
Future<void> stopServers() async {
|
||||
_qwenProcess?.kill();
|
||||
_nomicProcess?.kill();
|
||||
|
||||
if (Platform.isWindows) {
|
||||
try {
|
||||
await Process.run('taskkill', ['/F', '/IM', AiConstants.serverBinaryName]);
|
||||
} catch (_) {}
|
||||
} else if (Platform.isMacOS || Platform.isLinux) {
|
||||
try {
|
||||
await Process.run('pkill', ['-f', 'llama-server']);
|
||||
} catch (_) {}
|
||||
}
|
||||
|
||||
_qwenProcess = null;
|
||||
_nomicProcess = null;
|
||||
_updateStatus(AiServerStatus.offline);
|
||||
}
|
||||
|
||||
Future<bool> _isPortReady(int port) async {
|
||||
try {
|
||||
final socket = await Socket.connect(
|
||||
'127.0.0.1',
|
||||
port,
|
||||
timeout: const Duration(seconds: 1),
|
||||
);
|
||||
socket.destroy();
|
||||
return true;
|
||||
} catch (_) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void _updateStatus(AiServerStatus newStatus) {
|
||||
if (_status != newStatus) {
|
||||
_status = newStatus;
|
||||
notifyListeners();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user