{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Crawler\/CrawlerStatsRepository.php",
"content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Crawler;\n\n\/\/ @responsibility: Aggregationen (Histogramme, Summen) fuer einen Crawl\n\nuse Domain\\Crawler\\CrawlStats;\n\nfinal class CrawlerStatsRepository\n{\n public function __construct(private \\PDO $pdo)\n {\n }\n\n public function forCrawl(int $crawlId): CrawlStats\n {\n $totals = $this->fetchTotals($crawlId);\n $statusHist = $this->fetchStatusHistogram($crawlId);\n $flagHist = $this->fetchFlagHistogram($crawlId);\n\n return new CrawlStats(\n totalPages: $totals['pages'],\n totalLinks: $totals['links'],\n totalExternalLinks: $totals['external'],\n brokenExternalLinks: $totals['broken_external'],\n pagesByStatus: $statusHist,\n flagCounts: $flagHist,\n );\n }\n\n \/**\n * @return array{pages: int, links: int, external: int, broken_external: int}\n *\/\n private function fetchTotals(int $crawlId): array\n {\n $sql = 'SELECT '\n . '(SELECT COUNT(*) FROM pages WHERE crawl_id = :cid1) AS pages,'\n . '(SELECT COUNT(*) FROM links l JOIN pages p ON p.id = l.page_id'\n . ' WHERE p.crawl_id = :cid2) AS links,'\n . '(SELECT COUNT(*) FROM links l JOIN pages p ON p.id = l.page_id'\n . ' WHERE p.crawl_id = :cid3 AND l.is_external = 1) AS external,'\n . '(SELECT COUNT(*) FROM links l JOIN pages p ON p.id = l.page_id'\n . ' WHERE p.crawl_id = :cid4 AND l.is_external = 1'\n . ' AND l.external_http_status >= 400) AS broken_external';\n $stmt = $this->pdo->prepare($sql);\n $stmt->execute([':cid1' => $crawlId, ':cid2' => $crawlId,\n ':cid3' => $crawlId, ':cid4' => $crawlId]);\n $row = $stmt->fetch();\n\n return [\n 'pages' => (int) ($row['pages'] ?? 0),\n 'links' => (int) ($row['links'] ?? 0),\n 'external' => (int) ($row['external'] ?? 0),\n 'broken_external' => (int) ($row['broken_external'] ?? 0),\n ];\n }\n\n \/**\n * @return array<int, int>\n *\/\n private function fetchStatusHistogram(int $crawlId): array\n {\n $stmt = $this->pdo->prepare(\n 'SELECT http_status, COUNT(*) AS c FROM pages'\n . ' WHERE crawl_id = :cid AND http_status IS NOT NULL'\n . ' GROUP BY http_status ORDER BY http_status',\n );\n $stmt->execute([':cid' => $crawlId]);\n $result = [];\n foreach ($stmt->fetchAll() as $row) {\n $result[(int) $row['http_status']] = (int) $row['c'];\n }\n return $result;\n }\n\n \/**\n * @return array<string, int>\n *\/\n private function fetchFlagHistogram(int $crawlId): array\n {\n $stmt = $this->pdo->prepare(\n \"SELECT flag.`key` AS k, COUNT(*) AS c FROM pages p\"\n . ' JOIN JSON_TABLE(p.quality_flags, \"$[*]\"'\n . ' COLUMNS(`key` VARCHAR(60) PATH \"$.key\")) flag'\n . ' WHERE p.crawl_id = :cid GROUP BY flag.`key` ORDER BY c DESC',\n );\n $stmt->execute([':cid' => $crawlId]);\n $result = [];\n foreach ($stmt->fetchAll() as $row) {\n $result[(string) $row['k']] = (int) $row['c'];\n }\n return $result;\n }\n}\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Crawler\/CrawlerStatsRepository.php",
"content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Crawler;\n\n\/\/ @responsibility: Aggregationen (Histogramme, Summen) fuer einen Crawl\n\nuse Domain\\Crawler\\CrawlStats;\n\nfinal class CrawlerStatsRepository\n{\n public function __construct(private \\PDO $pdo)\n {\n }\n\n public function forCrawl(int $crawlId): CrawlStats\n {\n $totals = $this->fetchTotals($crawlId);\n $statusHist = $this->fetchStatusHistogram($crawlId);\n $flagHist = $this->fetchFlagHistogram($crawlId);\n\n return new CrawlStats(\n totalPages: $totals['pages'],\n totalLinks: $totals['links'],\n totalExternalLinks: $totals['external'],\n brokenExternalLinks: $totals['broken_external'],\n pagesByStatus: $statusHist,\n flagCounts: $flagHist,\n );\n }\n\n \/**\n * @return array{pages: int, links: int, external: int, broken_external: int}\n *\/\n private function fetchTotals(int $crawlId): array\n {\n $sql = 'SELECT '\n . '(SELECT COUNT(*) FROM pages WHERE crawl_id = :cid1) AS pages,'\n . '(SELECT COUNT(*) FROM links l JOIN pages p ON p.id = l.page_id'\n . ' WHERE p.crawl_id = :cid2) AS links,'\n . '(SELECT COUNT(*) FROM links l JOIN pages p ON p.id = l.page_id'\n . ' WHERE p.crawl_id = :cid3 AND l.is_external = 1) AS external,'\n . '(SELECT COUNT(*) FROM links l JOIN pages p ON p.id = l.page_id'\n . ' WHERE p.crawl_id = :cid4 AND l.is_external = 1'\n . ' AND l.external_http_status >= 400) AS broken_external';\n $stmt = $this->pdo->prepare($sql);\n $stmt->execute([':cid1' => $crawlId, ':cid2' => $crawlId,\n ':cid3' => $crawlId, ':cid4' => $crawlId]);\n $row = $stmt->fetch();\n\n return [\n 'pages' => (int) ($row['pages'] ?? 0),\n 'links' => (int) ($row['links'] ?? 0),\n 'external' => (int) ($row['external'] ?? 0),\n 'broken_external' => (int) ($row['broken_external'] ?? 0),\n ];\n }\n\n \/**\n * @return array<int, int>\n *\/\n private function fetchStatusHistogram(int $crawlId): array\n {\n $stmt = $this->pdo->prepare(\n 'SELECT http_status, COUNT(*) AS c FROM pages'\n . ' WHERE crawl_id = :cid AND http_status IS NOT NULL'\n . ' GROUP BY http_status ORDER BY http_status',\n );\n $stmt->execute([':cid' => $crawlId]);\n $result = [];\n foreach ($stmt->fetchAll() as $row) {\n $result[(int) $row['http_status']] = (int) $row['c'];\n }\n return $result;\n }\n\n \/**\n * @return array<string, int>\n *\/\n private function fetchFlagHistogram(int $crawlId): array\n {\n $stmt = $this->pdo->prepare(\n \"SELECT flag.`key` AS k, COUNT(*) AS c FROM pages p\"\n . ' JOIN JSON_TABLE(p.quality_flags, \"$[*]\"'\n . ' COLUMNS(`key` VARCHAR(60) PATH \"$.key\")) flag'\n . ' WHERE p.crawl_id = :cid GROUP BY flag.`key` ORDER BY c DESC',\n );\n $stmt->execute([':cid' => $crawlId]);\n $result = [];\n foreach ($stmt->fetchAll() as $row) {\n $result[(string) $row['k']] = (int) $row['c'];\n }\n return $result;\n }\n}\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}