Backup #83
| ID | 83 |
| Dateipfad | /var/www/dev.campus.systemische-tools.de/src/Infrastructure/AI/ChatService.php |
| Version | 4 |
| Typ |
modified |
| Größe | 10.9 KB |
| Hash | e21b9600a444ad206e4cee402005bb323849d8737c4b8591737bc7dab87a6602 |
| Datum | 2025-12-20 19:14:58 |
| Geändert von | claude-code-hook |
| Grund | Claude Code Pre-Hook Backup vor Edit-Operation |
| Datei existiert |
Ja
|
Dateiinhalt
<?php
declare(strict_types=1);
namespace Infrastructure\AI;
use RuntimeException;
/**
* RAG (Retrieval-Augmented Generation) Chat Service.
*
* Provides a complete RAG pipeline that:
* 1. Converts questions to embeddings using Ollama
* 2. Searches for relevant document chunks in Qdrant
* 3. Builds context from search results
* 4. Generates answers using Claude or Ollama
* 5. Returns structured responses with sources and metadata
*
* This service orchestrates the interaction between OllamaService,
* QdrantService, and ClaudeService to implement a production-ready
* RAG system for document-based question answering.
*
* @package Infrastructure\AI
* @author System Generated
* @version 1.0.0
*/
final readonly class ChatService
{
/**
* Constructs a new ChatService instance.
*
* @param OllamaService $ollama Ollama service for embeddings and optional LLM
* @param QdrantService $qdrant Qdrant service for vector search
* @param ClaudeService $claude Claude service for high-quality LLM responses
*/
public function __construct(
private OllamaService $ollama,
private QdrantService $qdrant,
private ClaudeService $claude
) {
}
/**
* Executes a complete RAG chat pipeline.
*
* Performs the following steps:
* 1. Generates an embedding vector for the question
* 2. Searches for similar documents in the vector database
* 3. Builds context from the most relevant chunks
* 4. Generates an answer using the specified LLM model
* 5. Extracts source information
* 6. Assembles a structured response
*
* @param string $question The user's question to answer
* @param string $model The LLM model (claude-* or ollama:*)
* @param string $collection The Qdrant collection to search in (default: documents)
* @param int $limit Maximum number of document chunks to retrieve (default: 5)
* @param string|null $stylePrompt Optional style prompt from author profile
* @param string|null $customSystemPrompt Optional custom system prompt (replaces default if set)
* @param float $temperature Sampling temperature 0.0-1.0 (default: 0.7)
* @param int $maxTokens Maximum tokens in response (default: 4096)
*
* @return array{
* question: string,
* answer: string,
* sources: array<int, array{title: string, score: float, content?: string}>,
* model: string,
* usage?: array{input_tokens: int, output_tokens: int},
* chunks_used: int
* } Complete chat response with answer, sources, and metadata
*
* @throws RuntimeException If embedding generation fails
* @throws RuntimeException If vector search fails
* @throws RuntimeException If no relevant documents are found
* @throws RuntimeException If LLM request fails
*
* @example
* $chat = new ChatService($ollama, $qdrant, $claude);
* $result = $chat->chat('Was ist systemisches Coaching?', 'claude-opus-4-5-20251101', 'documents', 5, null, null, 0.7, 4096);
* // Returns: [
* // 'question' => 'Was ist systemisches Coaching?',
* // 'answer' => 'Systemisches Coaching ist...',
* // 'sources' => [
* // ['title' => 'Coaching Grundlagen', 'score' => 0.89],
* // ['title' => 'Systemische Methoden', 'score' => 0.76]
* // ],
* // 'model' => 'claude-opus-4-5-20251101',
* // 'usage' => ['input_tokens' => 234, 'output_tokens' => 567],
* // 'chunks_used' => 5
* // ]
*/
public function chat(
string $question,
string $model = 'claude-opus-4-5-20251101',
string $collection = 'documents',
int $limit = 5,
?string $stylePrompt = null,
?string $customSystemPrompt = null,
float $temperature = 0.7,
int $maxTokens = 4096
): array {
// Step 1: Generate embedding for the question
try {
$queryEmbedding = $this->ollama->getEmbedding($question);
} catch (RuntimeException $e) {
throw new RuntimeException(
'Embedding generation failed: ' . $e->getMessage(),
0,
$e
);
}
if ($queryEmbedding === []) {
throw new RuntimeException('Embedding generation returned empty vector');
}
// Step 2: Search for relevant document chunks
try {
$searchResults = $this->qdrant->search($queryEmbedding, $collection, $limit);
} catch (RuntimeException $e) {
throw new RuntimeException(
'Vector search failed: ' . $e->getMessage(),
0,
$e
);
}
if ($searchResults === []) {
throw new RuntimeException('No relevant documents found for the question');
}
// Step 3: Build context from search results
$context = $this->buildContext($searchResults);
// Step 4: Parse model string and generate answer
$isOllama = str_starts_with($model, 'ollama:');
$isClaude = str_starts_with($model, 'claude-');
if ($isClaude) {
try {
$ragPrompt = $this->claude->buildRagPrompt($question, $context);
// Build system prompt hierarchy: Default -> Custom -> Style
if ($customSystemPrompt !== null && $customSystemPrompt !== '') {
$systemPrompt = $customSystemPrompt;
} else {
$systemPrompt = $this->claude->getDefaultSystemPrompt();
}
// Append style prompt from author profile if provided
if ($stylePrompt !== null && $stylePrompt !== '') {
$systemPrompt .= "\n\n" . $stylePrompt;
}
$llmResponse = $this->claude->ask($ragPrompt, $systemPrompt, $model, $maxTokens, $temperature);
$answer = $llmResponse['text'];
$usage = $llmResponse['usage'];
} catch (RuntimeException $e) {
throw new RuntimeException(
'Claude API request failed: ' . $e->getMessage(),
0,
$e
);
}
} elseif ($isOllama) {
try {
// Extract actual model name (remove "ollama:" prefix)
$ollamaModel = substr($model, 7);
// Build instruction from custom prompt and style
$instructions = [];
if ($customSystemPrompt !== null && $customSystemPrompt !== '') {
$instructions[] = $customSystemPrompt;
}
if ($stylePrompt !== null && $stylePrompt !== '') {
$instructions[] = $stylePrompt;
}
$instructionBlock = $instructions !== [] ? implode("\n\n", $instructions) . "\n\n" : '';
$ragPrompt = sprintf(
"%sKontext aus den Dokumenten:\n\n%s\n\n---\n\nFrage: %s",
$instructionBlock,
$context,
$question
);
$answer = $this->ollama->generate($ragPrompt, $ollamaModel, $temperature);
$usage = null;
} catch (RuntimeException $e) {
throw new RuntimeException(
'Ollama generation failed: ' . $e->getMessage(),
0,
$e
);
}
} else {
throw new RuntimeException(
sprintf('Unknown model "%s". Use claude-* or ollama:* format.', $model)
);
}
// Step 5: Extract source information
$sources = $this->extractSources($searchResults);
// Step 6: Assemble response
$response = [
'question' => $question,
'answer' => $answer,
'sources' => $sources,
'model' => $model,
'chunks_used' => count($searchResults),
];
if ($usage !== null) {
$response['usage'] = $usage;
}
return $response;
}
/**
* Builds a context string from search results.
*
* Concatenates the content from multiple search results into a single
* context string, respecting a maximum character limit. Each chunk is
* labeled with its source document title.
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults Vector search results
* @param int $maxTokens Maximum tokens to include (default: 3000)
*
* @return string The built context string
*/
private function buildContext(array $searchResults, int $maxTokens = 3000): string
{
$contextParts = [];
$totalChars = 0;
$maxChars = $maxTokens * 4; // Approximate: 1 token ~ 4 characters
foreach ($searchResults as $index => $result) {
$payload = $result['payload'];
$content = (string) ($payload['content'] ?? '');
$docTitle = (string) ($payload['document_title'] ?? 'Unbekannt');
// Check if adding this chunk would exceed the limit
if ($totalChars + strlen($content) > $maxChars) {
break;
}
$contextParts[] = sprintf('[Quelle %d: %s]%s%s', $index + 1, $docTitle, "\n", $content);
$totalChars += strlen($content);
}
return implode("\n\n---\n\n", $contextParts);
}
/**
* Extracts unique source information from search results.
*
* Collects document titles and scores from the search results,
* deduplicating by title to provide a clean list of sources.
* Optionally includes content preview if available.
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults Vector search results
*
* @return array<int, array{title: string, score: float, content?: string}> Deduplicated source information
*/
private function extractSources(array $searchResults): array
{
$sources = [];
$seen = [];
foreach ($searchResults as $result) {
$payload = $result['payload'];
$docTitle = (string) ($payload['document_title'] ?? '');
// Skip empty titles or already seen titles
if ($docTitle === '' || isset($seen[$docTitle])) {
continue;
}
$source = [
'title' => $docTitle,
'score' => round($result['score'], 3),
];
// Optionally include content preview
if (isset($payload['content']) && is_string($payload['content'])) {
$source['content'] = $payload['content'];
}
$sources[] = $source;
$seen[$docTitle] = true;
}
return $sources;
}
}
Vollständig herunterladen
Aktionen
Andere Versionen dieser Datei
| ID |
Version |
Typ |
Größe |
Datum |
| 85 |
6 |
modified |
11.5 KB |
2025-12-20 19:15 |
| 84 |
5 |
modified |
10.9 KB |
2025-12-20 19:15 |
| 83 |
4 |
modified |
10.9 KB |
2025-12-20 19:14 |
| 54 |
3 |
modified |
10.9 KB |
2025-12-20 18:30 |
| 53 |
2 |
modified |
10.9 KB |
2025-12-20 18:30 |
| 52 |
1 |
modified |
10.6 KB |
2025-12-20 18:30 |
← Zurück zur Übersicht