Backup #85
| ID | 85 |
| Dateipfad | /var/www/dev.campus.systemische-tools.de/src/Infrastructure/AI/ChatService.php |
| Version | 6 |
| Typ |
modified |
| Größe | 11.5 KB |
| Hash | 67a7aa5aba6fcdd30a06a43d3c0030a3b32a48d94dba2c0b2e4ba63d3f640e7a |
| Datum | 2025-12-20 19:15:47 |
| Geändert von | claude-code-hook |
| Grund | Claude Code Pre-Hook Backup vor Edit-Operation |
| Datei existiert |
Ja
|
Dateiinhalt
<?php
declare(strict_types=1);
namespace Infrastructure\AI;
use RuntimeException;
/**
* RAG (Retrieval-Augmented Generation) Chat Service.
*
* Provides a complete RAG pipeline that:
* 1. Converts questions to embeddings using Ollama
* 2. Searches for relevant document chunks in Qdrant
* 3. Builds context from search results
* 4. Generates answers using Claude or Ollama
* 5. Returns structured responses with sources and metadata
*
* This service orchestrates the interaction between OllamaService,
* QdrantService, and ClaudeService to implement a production-ready
* RAG system for document-based question answering.
*
* @package Infrastructure\AI
* @author System Generated
* @version 1.0.0
*/
final readonly class ChatService
{
/**
* Constructs a new ChatService instance.
*
* @param OllamaService $ollama Ollama service for embeddings and optional LLM
* @param QdrantService $qdrant Qdrant service for vector search
* @param ClaudeService $claude Claude service for high-quality LLM responses
*/
public function __construct(
private OllamaService $ollama,
private QdrantService $qdrant,
private ClaudeService $claude
) {
}
/**
* Executes a complete RAG chat pipeline.
*
* Performs the following steps:
* 1. Generates an embedding vector for the question (if collections selected)
* 2. Searches for similar documents in the vector database(s)
* 3. Builds context from the most relevant chunks
* 4. Generates an answer using the specified LLM model
* 5. Extracts source information
* 6. Assembles a structured response
*
* If no collections are selected, steps 1-3 and 5 are skipped (no RAG).
*
* @param string $question The user's question to answer
* @param string $model The LLM model (claude-* or ollama:*)
* @param array<string> $collections Qdrant collections to search (empty = no RAG)
* @param int $limit Maximum number of document chunks to retrieve (default: 5)
* @param string|null $stylePrompt Optional style prompt from author profile
* @param string|null $customSystemPrompt Optional custom system prompt (replaces default if set)
* @param float $temperature Sampling temperature 0.0-1.0 (default: 0.7)
* @param int $maxTokens Maximum tokens in response (default: 4096)
*
* @return array{
* question: string,
* answer: string,
* sources: array<int, array{title: string, score: float, content?: string}>,
* model: string,
* usage?: array{input_tokens: int, output_tokens: int},
* chunks_used: int
* } Complete chat response with answer, sources, and metadata
*
* @throws RuntimeException If embedding generation fails
* @throws RuntimeException If vector search fails
* @throws RuntimeException If LLM request fails
*
* @example
* $chat = new ChatService($ollama, $qdrant, $claude);
* // With RAG (multiple collections)
* $result = $chat->chat('Was ist systemisches Coaching?', 'claude-opus-4-5-20251101', ['documents', 'mail'], 5);
* // Without RAG (no collections)
* $result = $chat->chat('Erkläre mir Python', 'claude-opus-4-5-20251101', [], 5);
*/
public function chat(
string $question,
string $model = 'claude-opus-4-5-20251101',
array $collections = [],
int $limit = 5,
?string $stylePrompt = null,
?string $customSystemPrompt = null,
float $temperature = 0.7,
int $maxTokens = 4096
): array {
$searchResults = [];
$context = '';
// Only perform RAG if collections are selected
if ($collections !== []) {
// Step 1: Generate embedding for the question
try {
$queryEmbedding = $this->ollama->getEmbedding($question);
} catch (RuntimeException $e) {
throw new RuntimeException(
'Embedding generation failed: ' . $e->getMessage(),
0,
$e
);
}
if ($queryEmbedding === []) {
throw new RuntimeException('Embedding generation returned empty vector');
}
// Step 2: Search across all selected collections
try {
$searchResults = $this->searchMultipleCollections($queryEmbedding, $collections, $limit);
} catch (RuntimeException $e) {
throw new RuntimeException(
'Vector search failed: ' . $e->getMessage(),
0,
$e
);
}
// Step 3: Build context from search results (if any found)
if ($searchResults !== []) {
$context = $this->buildContext($searchResults);
}
}
// Step 4: Parse model string and generate answer
$isOllama = str_starts_with($model, 'ollama:');
$isClaude = str_starts_with($model, 'claude-');
$hasContext = $context !== '';
if ($isClaude) {
try {
// Build prompt: RAG with context or direct question
if ($hasContext) {
$userPrompt = $this->claude->buildRagPrompt($question, $context);
} else {
$userPrompt = $question;
}
// Build system prompt hierarchy: Default -> Custom -> Style
if ($customSystemPrompt !== null && $customSystemPrompt !== '') {
$systemPrompt = $customSystemPrompt;
} else {
$systemPrompt = $hasContext
? $this->claude->getDefaultSystemPrompt()
: 'Du bist ein hilfreicher Assistent. Antworte auf Deutsch, präzise und hilfreich.';
}
// Append style prompt from author profile if provided
if ($stylePrompt !== null && $stylePrompt !== '') {
$systemPrompt .= "\n\n" . $stylePrompt;
}
$llmResponse = $this->claude->ask($userPrompt, $systemPrompt, $model, $maxTokens, $temperature);
$answer = $llmResponse['text'];
$usage = $llmResponse['usage'];
} catch (RuntimeException $e) {
throw new RuntimeException(
'Claude API request failed: ' . $e->getMessage(),
0,
$e
);
}
} elseif ($isOllama) {
try {
// Extract actual model name (remove "ollama:" prefix)
$ollamaModel = substr($model, 7);
// Build instruction from custom prompt and style
$instructions = [];
if ($customSystemPrompt !== null && $customSystemPrompt !== '') {
$instructions[] = $customSystemPrompt;
}
if ($stylePrompt !== null && $stylePrompt !== '') {
$instructions[] = $stylePrompt;
}
$instructionBlock = $instructions !== [] ? implode("\n\n", $instructions) . "\n\n" : '';
// Build prompt: RAG with context or direct question
if ($hasContext) {
$userPrompt = sprintf(
"%sKontext aus den Dokumenten:\n\n%s\n\n---\n\nFrage: %s",
$instructionBlock,
$context,
$question
);
} else {
$userPrompt = $instructionBlock . $question;
}
$answer = $this->ollama->generate($userPrompt, $ollamaModel, $temperature);
$usage = null;
} catch (RuntimeException $e) {
throw new RuntimeException(
'Ollama generation failed: ' . $e->getMessage(),
0,
$e
);
}
} else {
throw new RuntimeException(
sprintf('Unknown model "%s". Use claude-* or ollama:* format.', $model)
);
}
// Step 5: Extract source information
$sources = $this->extractSources($searchResults);
// Step 6: Assemble response
$response = [
'question' => $question,
'answer' => $answer,
'sources' => $sources,
'model' => $model,
'chunks_used' => count($searchResults),
];
if ($usage !== null) {
$response['usage'] = $usage;
}
return $response;
}
/**
* Builds a context string from search results.
*
* Concatenates the content from multiple search results into a single
* context string, respecting a maximum character limit. Each chunk is
* labeled with its source document title.
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults Vector search results
* @param int $maxTokens Maximum tokens to include (default: 3000)
*
* @return string The built context string
*/
private function buildContext(array $searchResults, int $maxTokens = 3000): string
{
$contextParts = [];
$totalChars = 0;
$maxChars = $maxTokens * 4; // Approximate: 1 token ~ 4 characters
foreach ($searchResults as $index => $result) {
$payload = $result['payload'];
$content = (string) ($payload['content'] ?? '');
$docTitle = (string) ($payload['document_title'] ?? 'Unbekannt');
// Check if adding this chunk would exceed the limit
if ($totalChars + strlen($content) > $maxChars) {
break;
}
$contextParts[] = sprintf('[Quelle %d: %s]%s%s', $index + 1, $docTitle, "\n", $content);
$totalChars += strlen($content);
}
return implode("\n\n---\n\n", $contextParts);
}
/**
* Extracts unique source information from search results.
*
* Collects document titles and scores from the search results,
* deduplicating by title to provide a clean list of sources.
* Optionally includes content preview if available.
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults Vector search results
*
* @return array<int, array{title: string, score: float, content?: string}> Deduplicated source information
*/
private function extractSources(array $searchResults): array
{
$sources = [];
$seen = [];
foreach ($searchResults as $result) {
$payload = $result['payload'];
$docTitle = (string) ($payload['document_title'] ?? '');
// Skip empty titles or already seen titles
if ($docTitle === '' || isset($seen[$docTitle])) {
continue;
}
$source = [
'title' => $docTitle,
'score' => round($result['score'], 3),
];
// Optionally include content preview
if (isset($payload['content']) && is_string($payload['content'])) {
$source['content'] = $payload['content'];
}
$sources[] = $source;
$seen[$docTitle] = true;
}
return $sources;
}
}
Vollständig herunterladen
Aktionen
Andere Versionen dieser Datei
| ID |
Version |
Typ |
Größe |
Datum |
| 85 |
6 |
modified |
11.5 KB |
2025-12-20 19:15 |
| 84 |
5 |
modified |
10.9 KB |
2025-12-20 19:15 |
| 83 |
4 |
modified |
10.9 KB |
2025-12-20 19:14 |
| 54 |
3 |
modified |
10.9 KB |
2025-12-20 18:30 |
| 53 |
2 |
modified |
10.9 KB |
2025-12-20 18:30 |
| 52 |
1 |
modified |
10.6 KB |
2025-12-20 18:30 |
← Zurück zur Übersicht