pdo = $pdo; } /** * @return array{total: int, tokens: int, analyzed: int, synced: int} */ public function getChunkStats(): array { $result = $this->pdo->query( 'SELECT COUNT(*) as total, COALESCE(SUM(token_count), 0) as tokens, SUM(CASE WHEN analysis_status = "completed" THEN 1 ELSE 0 END) as analyzed, SUM(CASE WHEN qdrant_id IS NOT NULL THEN 1 ELSE 0 END) as synced FROM dokumentation_chunks' )->fetch(PDO::FETCH_ASSOC); return $result ?: ['total' => 0, 'tokens' => 0, 'analyzed' => 0, 'synced' => 0]; } public function countChunksFiltered(string $category = '', string $status = '', string $search = ''): int { $sql = 'SELECT COUNT(*) FROM dokumentation_chunks c JOIN dokumentation d ON c.dokumentation_id = d.id WHERE 1=1'; $params = []; if ($category !== '') { $sql .= ' AND c.taxonomy_category = :category'; $params['category'] = $category; } if ($status !== '') { $sql .= ' AND c.analysis_status = :status'; $params['status'] = $status; } if ($search !== '') { $sql .= ' AND (c.content LIKE :search OR c.keywords LIKE :search2)'; $params['search'] = '%' . $search . '%'; $params['search2'] = '%' . $search . '%'; } $stmt = $this->pdo->prepare($sql); $stmt->execute($params); return (int) $stmt->fetchColumn(); } /** * @return array> */ public function getChunksFilteredPaginated( string $category = '', string $status = '', string $search = '', int $limit = 50, int $offset = 0 ): array { $sql = 'SELECT c.id, c.chunk_index, c.content, c.token_count, c.taxonomy_category, c.analysis_status, c.qdrant_id, c.created_at, d.title as dokument_title, d.path as dokument_path FROM dokumentation_chunks c JOIN dokumentation d ON c.dokumentation_id = d.id WHERE 1=1'; $params = []; if ($category !== '') { $sql .= ' AND c.taxonomy_category = :category'; $params['category'] = $category; } if ($status !== '') { $sql .= ' AND c.analysis_status = :status'; $params['status'] = $status; } if ($search !== '') { $sql .= ' AND (c.content LIKE :search OR c.keywords LIKE :search2)'; $params['search'] = '%' . $search . '%'; $params['search2'] = '%' . $search . '%'; } $sql .= ' ORDER BY c.created_at DESC LIMIT :limit OFFSET :offset'; $stmt = $this->pdo->prepare($sql); foreach ($params as $key => $value) { $stmt->bindValue(':' . $key, $value); } $stmt->bindValue(':limit', $limit, PDO::PARAM_INT); $stmt->bindValue(':offset', $offset, PDO::PARAM_INT); $stmt->execute(); return $stmt->fetchAll(PDO::FETCH_ASSOC); } /** * @return array> */ public function getRecentChunks(int $limit = 5): array { $stmt = $this->pdo->prepare( 'SELECT c.id, c.content, c.taxonomy_category, c.token_count, c.created_at, d.title as dokument_title, d.path as dokument_path FROM dokumentation_chunks c JOIN dokumentation d ON c.dokumentation_id = d.id ORDER BY c.created_at DESC LIMIT :limit' ); $stmt->bindValue(':limit', $limit, PDO::PARAM_INT); $stmt->execute(); return $stmt->fetchAll(PDO::FETCH_ASSOC); } /** * @return array|null */ public function getChunk(int $id): ?array { $stmt = $this->pdo->prepare( 'SELECT c.*, d.title as dokument_title, d.path as dokument_path, d.id as dokument_id FROM dokumentation_chunks c JOIN dokumentation d ON c.dokumentation_id = d.id WHERE c.id = :id' ); $stmt->execute(['id' => $id]); $result = $stmt->fetch(PDO::FETCH_ASSOC); if ($result === false) { return null; } // Decode JSON fields $result['entities_decoded'] = $this->decodeJsonArray($result['entities'] ?? null); $result['keywords_decoded'] = $this->decodeJsonArray($result['keywords'] ?? null); $result['taxonomy_path_decoded'] = $this->decodeJsonArray($result['taxonomy_path'] ?? null); $result['heading_path_decoded'] = $this->decodeJsonArray($result['heading_path'] ?? null); return $result; } /** * @return array> */ public function getChunksForDokument(int $dokumentId): array { $stmt = $this->pdo->prepare( 'SELECT id, chunk_index, content, token_count, taxonomy_category, analysis_status, qdrant_id, created_at FROM dokumentation_chunks WHERE dokumentation_id = :id ORDER BY chunk_index' ); $stmt->execute(['id' => $dokumentId]); return $stmt->fetchAll(PDO::FETCH_ASSOC); } /** * @return array> */ public function getChunksDetailedForDokument(int $dokumentId): array { $stmt = $this->pdo->prepare( 'SELECT c.id, c.chunk_index, c.content, c.token_count, c.taxonomy_category, c.taxonomy_path, c.entities, c.keywords, c.analysis_status, c.qdrant_id, c.heading_path, c.analyzed_at FROM dokumentation_chunks c WHERE c.dokumentation_id = :id ORDER BY c.chunk_index' ); $stmt->execute(['id' => $dokumentId]); return $stmt->fetchAll(PDO::FETCH_ASSOC); } /** * @return array|null */ public function getChunkByDokumentAndIndex(int $dokumentId, int $index): ?array { $stmt = $this->pdo->prepare( 'SELECT id, chunk_index, content FROM dokumentation_chunks WHERE dokumentation_id = :doc_id AND chunk_index = :idx' ); $stmt->execute(['doc_id' => $dokumentId, 'idx' => $index]); $result = $stmt->fetch(PDO::FETCH_ASSOC); return $result !== false ? $result : null; } /** * @return array */ public function getDistinctCategories(): array { return $this->pdo->query( 'SELECT DISTINCT taxonomy_category FROM dokumentation_chunks WHERE taxonomy_category IS NOT NULL ORDER BY taxonomy_category' )->fetchAll(PDO::FETCH_COLUMN); } /** * @return array */ public function getTopTaxonomyCategories(int $limit = 10): array { $stmt = $this->pdo->prepare( 'SELECT taxonomy_category, COUNT(*) as count FROM dokumentation_chunks WHERE taxonomy_category IS NOT NULL GROUP BY taxonomy_category ORDER BY count DESC LIMIT :limit' ); $stmt->bindValue(':limit', $limit, PDO::PARAM_INT); $stmt->execute(); return $stmt->fetchAll(PDO::FETCH_ASSOC); } /** * @return array */ public function getCategoriesWithStats(): array { return $this->pdo->query( 'SELECT taxonomy_category, COUNT(*) as chunk_count, COALESCE(SUM(token_count), 0) as token_count FROM dokumentation_chunks WHERE taxonomy_category IS NOT NULL GROUP BY taxonomy_category ORDER BY chunk_count DESC' )->fetchAll(PDO::FETCH_ASSOC); } /** * @return array */ public function getTopKeywords(int $limit = 30): array { $keywordsRaw = $this->pdo->query( 'SELECT keywords FROM dokumentation_chunks WHERE keywords IS NOT NULL' )->fetchAll(PDO::FETCH_COLUMN); $keywordCounts = []; foreach ($keywordsRaw as $json) { $keywords = $this->decodeJsonArray($json); foreach ($keywords as $kw) { $kw = strtolower(trim($kw)); if ($kw !== '') { $keywordCounts[$kw] = ($keywordCounts[$kw] ?? 0) + 1; } } } arsort($keywordCounts); return array_slice($keywordCounts, 0, $limit, true); } /** * @return array */ public function getTopEntitiesRaw(int $limit = 30): array { $entityCounts = $this->aggregateEntities(); arsort($entityCounts); return array_slice($entityCounts, 0, $limit, true); } /** * @return array{entities: array, by_type: array, total: int} */ public function getEntitiesGrouped(int $limit = Constants::DEFAULT_LIMIT): array { $entitiesRaw = $this->pdo->query( 'SELECT entities FROM dokumentation_chunks WHERE entities IS NOT NULL' )->fetchAll(PDO::FETCH_COLUMN); $entityCounts = []; foreach ($entitiesRaw as $json) { $entities = $this->decodeJsonArray($json); foreach ($entities as $entity) { $name = $entity['name'] ?? ''; $type = $entity['type'] ?? 'OTHER'; if ($name !== '') { $key = $name . '|' . $type; if (!isset($entityCounts[$key])) { $entityCounts[$key] = [ 'name' => $name, 'type' => $type, 'count' => 0, ]; } $entityCounts[$key]['count']++; } } } // Sort by count usort($entityCounts, fn ($a, $b) => $b['count'] <=> $a['count']); // Group by type $byType = []; foreach ($entityCounts as $entity) { $type = $entity['type']; if (!isset($byType[$type])) { $byType[$type] = []; } $byType[$type][] = $entity; } return [ 'entities' => array_slice($entityCounts, 0, $limit), 'by_type' => $byType, 'total' => count($entityCounts), ]; } /** * Aggregate entities from all chunks. * * @return array */ private function aggregateEntities(): array { $entitiesRaw = $this->pdo->query( 'SELECT entities FROM dokumentation_chunks WHERE entities IS NOT NULL' )->fetchAll(PDO::FETCH_COLUMN); $entityCounts = []; foreach ($entitiesRaw as $json) { $entities = $this->decodeJsonArray($json); foreach ($entities as $entity) { $name = $entity['name'] ?? ''; $type = $entity['type'] ?? 'OTHER'; if ($name !== '') { $key = $name . '|' . $type; $entityCounts[$key] = ($entityCounts[$key] ?? 0) + 1; } } } return $entityCounts; } }