trainhub-flutter/lib/data/services/ai_process_manager.dart

import 'dart:io';

import 'package:flutter/foundation.dart';
import 'package:path/path.dart' as p;
import 'package:path_provider/path_provider.dart';
import 'package:trainhub_flutter/core/constants/ai_constants.dart';

enum AiServerStatus { offline, starting, ready, error }

/// Manages the two llama.cpp server processes that provide AI features.
///
/// Both processes are kept alive for the lifetime of the app and must be
/// killed on shutdown to prevent zombie processes from consuming RAM.
///
/// - Qwen 2.5 7B  → port 8080  (chat / completions)
/// - Nomic Embed  → port 8081  (embeddings)
class AiProcessManager extends ChangeNotifier {
  Process? _qwenProcess;
  Process? _nomicProcess;
  AiServerStatus _status = AiServerStatus.offline;
  String? _lastError;

  // -------------------------------------------------------------------------
  // Public API
  // -------------------------------------------------------------------------

  AiServerStatus get status => _status;
  bool get isRunning => _status == AiServerStatus.ready;
  String? get errorMessage => _lastError;

  /// Starts both inference servers.  No-ops if already running or starting.
  Future<void> startServers() async {
    if (_status == AiServerStatus.starting || _status == AiServerStatus.ready) {
      return;
    }

    _updateStatus(AiServerStatus.starting);
    _lastError = null;

    final dir = await getApplicationDocumentsDirectory();
    final base = dir.path;
    final serverBin = p.join(base, AiConstants.serverBinaryName);

    if (!File(serverBin).existsSync()) {
      _lastError = 'llama-server executable not found.';
      _updateStatus(AiServerStatus.error);
      return;
    }

    try {
      _qwenProcess = await Process.start(serverBin, [
        '-m', p.join(base, AiConstants.qwenModelFile),
        '--port', '${AiConstants.chatServerPort}',
        '--ctx-size', '${AiConstants.qwenContextSize}',
        '-ngl', '${AiConstants.gpuLayerOffload}',
      ], runInShell: false);

      _qwenProcess!.stdout.listen((event) {
        if (kDebugMode) print('[QWEN STDOUT] ${String.fromCharCodes(event)}');
      });
      _qwenProcess!.stderr.listen((event) {
        if (kDebugMode) print('[QWEN STDERR] ${String.fromCharCodes(event)}');
      });

      // Monitor for unexpected crash
      _qwenProcess!.exitCode.then((code) {
        if (_status == AiServerStatus.ready ||
            _status == AiServerStatus.starting) {
          _lastError = 'Qwen Chat Server crashed with code $code';
          _updateStatus(AiServerStatus.error);
        }
      });

      _nomicProcess = await Process.start(serverBin, [
        '-m', p.join(base, AiConstants.nomicModelFile),
        '--port', '${AiConstants.embeddingServerPort}',
        '--ctx-size', '${AiConstants.nomicContextSize}',
        '--embedding',
      ], runInShell: false);

      _nomicProcess!.stdout.listen((event) {
        if (kDebugMode) print('[NOMIC STDOUT] ${String.fromCharCodes(event)}');
      });
      _nomicProcess!.stderr.listen((event) {
        if (kDebugMode) print('[NOMIC STDERR] ${String.fromCharCodes(event)}');
      });

      // Monitor for unexpected crash
      _nomicProcess!.exitCode.then((code) {
        if (_status == AiServerStatus.ready ||
            _status == AiServerStatus.starting) {
          _lastError = 'Nomic Embedding Server crashed with code $code';
          _updateStatus(AiServerStatus.error);
        }
      });

      // Wait for servers to bind to their ports and allocate memory.
      // This is crucial because loading models (especially 7B) takes several
      // seconds and significant RAM, which might cause the dart process to appear hung.
      int attempts = 0;
      bool qwenReady = false;
      bool nomicReady = false;

      while (attempts < 20 && (!qwenReady || !nomicReady)) {
        await Future.delayed(const Duration(milliseconds: 500));

        if (!qwenReady) {
          qwenReady = await _isPortReady(AiConstants.chatServerPort);
        }
        if (!nomicReady) {
          nomicReady = await _isPortReady(AiConstants.embeddingServerPort);
        }

        attempts++;
      }

      if (!qwenReady || !nomicReady) {
        throw Exception('Servers failed to start within 10 seconds.');
      }

      _updateStatus(AiServerStatus.ready);
    } catch (e) {
      // Clean up any partially-started processes before returning error.
      _qwenProcess?.kill();
      _nomicProcess?.kill();
      _qwenProcess = null;
      _nomicProcess = null;
      _lastError = e.toString();
      _updateStatus(AiServerStatus.error);
    }
  }

  /// Kills both processes and resets the running flag.
  /// Safe to call even if servers were never started.
  Future<void> stopServers() async {
    _qwenProcess?.kill();
    _nomicProcess?.kill();

    if (Platform.isWindows) {
      try {
        await Process.run('taskkill', ['/F', '/IM', AiConstants.serverBinaryName]);
      } catch (_) {}
    } else if (Platform.isMacOS || Platform.isLinux) {
      try {
        await Process.run('pkill', ['-f', 'llama-server']);
      } catch (_) {}
    }

    _qwenProcess = null;
    _nomicProcess = null;
    _updateStatus(AiServerStatus.offline);
  }

  Future<bool> _isPortReady(int port) async {
    try {
      final socket = await Socket.connect(
        '127.0.0.1',
        port,
        timeout: const Duration(seconds: 1),
      );
      socket.destroy();
      return true;
    } catch (_) {
      return false;
    }
  }

  void _updateStatus(AiServerStatus newStatus) {
    if (_status != newStatus) {
      _status = newStatus;
      notifyListeners();
    }
  }
}