Next refactors
Some checks failed
Build Linux App / build (push) Failing after 1m18s

This commit is contained in:
Kazimierz Ciołek
2026-02-24 02:19:28 +01:00
parent 0c9eb8878d
commit 9dcc4b87de
40 changed files with 3515 additions and 2575 deletions

View File

@@ -1,88 +0,0 @@
import 'dart:io';
import 'package:path/path.dart' as p;
import 'package:path_provider/path_provider.dart';
/// Manages the two llama.cpp server processes that provide AI features.
///
/// Both processes are kept alive for the lifetime of the app and must be
/// killed on shutdown to prevent zombie processes from consuming RAM.
///
/// - Qwen 2.5 7B → port 8080 (chat / completions)
/// - Nomic Embed → port 8081 (embeddings)
class AiProcessManager {
Process? _qwenProcess;
Process? _nomicProcess;
bool _running = false;
// -------------------------------------------------------------------------
// Public API
// -------------------------------------------------------------------------
bool get isRunning => _running;
/// Starts both inference servers. No-ops if already running or if the
/// llama-server binary is not present on disk.
Future<void> startServers() async {
if (_running) return;
final dir = await getApplicationDocumentsDirectory();
final base = dir.path;
final serverBin = p.join(
base,
Platform.isWindows ? 'llama-server.exe' : 'llama-server',
);
if (!File(serverBin).existsSync()) return;
try {
// ── Qwen 2.5 7B chat server ──────────────────────────────────────────
_qwenProcess = await Process.start(
serverBin,
[
'-m', p.join(base, 'qwen2.5-7b-instruct-q4_k_m.gguf'),
'--port', '8080',
'--ctx-size', '4096',
'-ngl', '99', // offload all layers to GPU
],
runInShell: false,
);
// Drain pipes so the process is never blocked by a full buffer.
_qwenProcess!.stdout.drain<List<int>>();
_qwenProcess!.stderr.drain<List<int>>();
// ── Nomic embedding server ───────────────────────────────────────────
_nomicProcess = await Process.start(
serverBin,
[
'-m', p.join(base, 'nomic-embed-text-v1.5.Q4_K_M.gguf'),
'--port', '8081',
'--ctx-size', '8192',
'--embedding',
],
runInShell: false,
);
_nomicProcess!.stdout.drain<List<int>>();
_nomicProcess!.stderr.drain<List<int>>();
_running = true;
} catch (_) {
// Clean up any partially-started processes before rethrowing.
_qwenProcess?.kill();
_nomicProcess?.kill();
_qwenProcess = null;
_nomicProcess = null;
rethrow;
}
}
/// Kills both processes and resets the running flag.
/// Safe to call even if servers were never started.
Future<void> stopServers() async {
_qwenProcess?.kill();
_nomicProcess?.kill();
_qwenProcess = null;
_nomicProcess = null;
_running = false;
}
}

View File

@@ -1,31 +0,0 @@
import 'package:dio/dio.dart';
/// Wraps the Nomic embedding server (llama.cpp, port 8081).
/// Returns a 768-dimensional float vector for any input text.
class EmbeddingService {
final _dio = Dio(
BaseOptions(
connectTimeout: const Duration(seconds: 10),
receiveTimeout: const Duration(seconds: 60),
),
);
static const _url = 'http://localhost:8081/v1/embeddings';
/// Returns the embedding vector for [text].
/// Throws a [DioException] if the Nomic server is unreachable.
Future<List<double>> embed(String text) async {
final response = await _dio.post<Map<String, dynamic>>(
_url,
data: {
'input': text,
'model': 'nomic-embed-text-v1.5.Q4_K_M',
},
);
final raw =
(response.data!['data'] as List<dynamic>)[0]['embedding']
as List<dynamic>;
return raw.map((e) => (e as num).toDouble()).toList();
}
}