175 lines
5.5 KiB
Dart
175 lines
5.5 KiB
Dart
import 'dart:io';
|
|
|
|
import 'package:flutter/foundation.dart';
|
|
import 'package:path/path.dart' as p;
|
|
import 'package:path_provider/path_provider.dart';
|
|
import 'package:trainhub_flutter/core/constants/ai_constants.dart';
|
|
|
|
enum AiServerStatus { offline, starting, ready, error }
|
|
|
|
/// Manages the two llama.cpp server processes that provide AI features.
|
|
///
|
|
/// Both processes are kept alive for the lifetime of the app and must be
|
|
/// killed on shutdown to prevent zombie processes from consuming RAM.
|
|
///
|
|
/// - Qwen 2.5 7B → port 8080 (chat / completions)
|
|
/// - Nomic Embed → port 8081 (embeddings)
|
|
class AiProcessManager extends ChangeNotifier {
|
|
Process? _qwenProcess;
|
|
Process? _nomicProcess;
|
|
AiServerStatus _status = AiServerStatus.offline;
|
|
String? _lastError;
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Public API
|
|
// -------------------------------------------------------------------------
|
|
|
|
AiServerStatus get status => _status;
|
|
bool get isRunning => _status == AiServerStatus.ready;
|
|
String? get errorMessage => _lastError;
|
|
|
|
/// Starts both inference servers. No-ops if already running or starting.
|
|
Future<void> startServers() async {
|
|
if (_status == AiServerStatus.starting || _status == AiServerStatus.ready) {
|
|
return;
|
|
}
|
|
|
|
_updateStatus(AiServerStatus.starting);
|
|
_lastError = null;
|
|
|
|
final dir = await getApplicationDocumentsDirectory();
|
|
final base = dir.path;
|
|
final serverBin = p.join(base, AiConstants.serverBinaryName);
|
|
|
|
if (!File(serverBin).existsSync()) {
|
|
_lastError = 'llama-server executable not found.';
|
|
_updateStatus(AiServerStatus.error);
|
|
return;
|
|
}
|
|
|
|
try {
|
|
_qwenProcess = await Process.start(serverBin, [
|
|
'-m', p.join(base, AiConstants.qwenModelFile),
|
|
'--port', '${AiConstants.chatServerPort}',
|
|
'--ctx-size', '${AiConstants.qwenContextSize}',
|
|
'-ngl', '${AiConstants.gpuLayerOffload}',
|
|
], runInShell: false);
|
|
|
|
_qwenProcess!.stdout.listen((event) {
|
|
if (kDebugMode) print('[QWEN STDOUT] ${String.fromCharCodes(event)}');
|
|
});
|
|
_qwenProcess!.stderr.listen((event) {
|
|
if (kDebugMode) print('[QWEN STDERR] ${String.fromCharCodes(event)}');
|
|
});
|
|
|
|
// Monitor for unexpected crash
|
|
_qwenProcess!.exitCode.then((code) {
|
|
if (_status == AiServerStatus.ready ||
|
|
_status == AiServerStatus.starting) {
|
|
_lastError = 'Qwen Chat Server crashed with code $code';
|
|
_updateStatus(AiServerStatus.error);
|
|
}
|
|
});
|
|
|
|
_nomicProcess = await Process.start(serverBin, [
|
|
'-m', p.join(base, AiConstants.nomicModelFile),
|
|
'--port', '${AiConstants.embeddingServerPort}',
|
|
'--ctx-size', '${AiConstants.nomicContextSize}',
|
|
'--embedding',
|
|
], runInShell: false);
|
|
|
|
_nomicProcess!.stdout.listen((event) {
|
|
if (kDebugMode) print('[NOMIC STDOUT] ${String.fromCharCodes(event)}');
|
|
});
|
|
_nomicProcess!.stderr.listen((event) {
|
|
if (kDebugMode) print('[NOMIC STDERR] ${String.fromCharCodes(event)}');
|
|
});
|
|
|
|
// Monitor for unexpected crash
|
|
_nomicProcess!.exitCode.then((code) {
|
|
if (_status == AiServerStatus.ready ||
|
|
_status == AiServerStatus.starting) {
|
|
_lastError = 'Nomic Embedding Server crashed with code $code';
|
|
_updateStatus(AiServerStatus.error);
|
|
}
|
|
});
|
|
|
|
// Wait for servers to bind to their ports and allocate memory.
|
|
// This is crucial because loading models (especially 7B) takes several
|
|
// seconds and significant RAM, which might cause the dart process to appear hung.
|
|
int attempts = 0;
|
|
bool qwenReady = false;
|
|
bool nomicReady = false;
|
|
|
|
while (attempts < 20 && (!qwenReady || !nomicReady)) {
|
|
await Future.delayed(const Duration(milliseconds: 500));
|
|
|
|
if (!qwenReady) {
|
|
qwenReady = await _isPortReady(AiConstants.chatServerPort);
|
|
}
|
|
if (!nomicReady) {
|
|
nomicReady = await _isPortReady(AiConstants.embeddingServerPort);
|
|
}
|
|
|
|
attempts++;
|
|
}
|
|
|
|
if (!qwenReady || !nomicReady) {
|
|
throw Exception('Servers failed to start within 10 seconds.');
|
|
}
|
|
|
|
_updateStatus(AiServerStatus.ready);
|
|
} catch (e) {
|
|
// Clean up any partially-started processes before returning error.
|
|
_qwenProcess?.kill();
|
|
_nomicProcess?.kill();
|
|
_qwenProcess = null;
|
|
_nomicProcess = null;
|
|
_lastError = e.toString();
|
|
_updateStatus(AiServerStatus.error);
|
|
}
|
|
}
|
|
|
|
/// Kills both processes and resets the running flag.
|
|
/// Safe to call even if servers were never started.
|
|
Future<void> stopServers() async {
|
|
_qwenProcess?.kill();
|
|
_nomicProcess?.kill();
|
|
|
|
if (Platform.isWindows) {
|
|
try {
|
|
await Process.run('taskkill', ['/F', '/IM', AiConstants.serverBinaryName]);
|
|
} catch (_) {}
|
|
} else if (Platform.isMacOS || Platform.isLinux) {
|
|
try {
|
|
await Process.run('pkill', ['-f', 'llama-server']);
|
|
} catch (_) {}
|
|
}
|
|
|
|
_qwenProcess = null;
|
|
_nomicProcess = null;
|
|
_updateStatus(AiServerStatus.offline);
|
|
}
|
|
|
|
Future<bool> _isPortReady(int port) async {
|
|
try {
|
|
final socket = await Socket.connect(
|
|
'127.0.0.1',
|
|
port,
|
|
timeout: const Duration(seconds: 1),
|
|
);
|
|
socket.destroy();
|
|
return true;
|
|
} catch (_) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void _updateStatus(AiServerStatus newStatus) {
|
|
if (_status != newStatus) {
|
|
_status = newStatus;
|
|
notifyListeners();
|
|
}
|
|
}
|
|
}
|