{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/db\/schema.sql",
"oldString": "CREATE TABLE IF NOT EXISTS page_tokens (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n page_id BIGINT UNSIGNED NOT NULL,\n token VARCHAR(80) NOT NULL,\n tf INT UNSIGNED NOT NULL,\n CONSTRAINT fk_tokens_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,\n INDEX idx_page (page_id),\n INDEX idx_token (token)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;",
"newString": "CREATE TABLE IF NOT EXISTS page_tokens (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n page_id BIGINT UNSIGNED NOT NULL,\n token VARCHAR(80) NOT NULL,\n lemma VARCHAR(80) NULL,\n tf INT UNSIGNED NOT NULL,\n CONSTRAINT fk_tokens_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,\n INDEX idx_page (page_id),\n INDEX idx_token (token),\n INDEX idx_lemma (lemma)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS reference_corpus_de (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n corpus VARCHAR(40) NOT NULL,\n token VARCHAR(80) NOT NULL,\n frequency BIGINT UNSIGNED NOT NULL,\n UNIQUE KEY uq_corpus_token (corpus, token),\n INDEX idx_token (token)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;",
"originalFile": "-- Crawler DB schema. Target: mariadb \/ utf8mb4_unicode_ci.\n-- See planung\/00-planung.md §5 for field rationale.\n\nCREATE TABLE IF NOT EXISTS crawls (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n base_url VARCHAR(500) NOT NULL,\n mode ENUM('fast','full','hybrid') NOT NULL,\n started_at DATETIME(3) NOT NULL,\n finished_at DATETIME(3) NULL,\n status ENUM('running','completed','failed','aborted') NOT NULL,\n config_json JSON NULL,\n total_urls INT UNSIGNED NOT NULL DEFAULT 0,\n total_errors INT UNSIGNED NOT NULL DEFAULT 0,\n trigger_source ENUM('cli','cron','api','ui') NOT NULL,\n INDEX idx_started (started_at),\n INDEX idx_status (status)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS urls (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n url_hash CHAR(64) NOT NULL UNIQUE,\n url VARCHAR(2048) NOT NULL,\n scheme VARCHAR(10) NOT NULL,\n host VARCHAR(255) NOT NULL,\n path VARCHAR(2048) NOT NULL,\n is_internal TINYINT(1) NOT NULL,\n first_seen_at DATETIME(3) NOT NULL,\n last_seen_at DATETIME(3) NOT NULL,\n INDEX idx_host (host),\n INDEX idx_internal (is_internal)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS pages (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n crawl_id BIGINT UNSIGNED NOT NULL,\n url_id BIGINT UNSIGNED NOT NULL,\n fetched_at DATETIME(3) NOT NULL,\n render_mode ENUM('http','playwright') NOT NULL,\n http_status SMALLINT NULL,\n final_url VARCHAR(2048) NULL,\n redirect_chain JSON NULL,\n response_time_ms INT UNSIGNED NULL,\n ttfb_ms INT UNSIGNED NULL,\n content_type VARCHAR(200) NULL,\n content_length BIGINT NULL,\n content_encoding VARCHAR(50) NULL,\n title TEXT NULL,\n title_length SMALLINT NULL,\n meta_description TEXT NULL,\n meta_description_length SMALLINT NULL,\n meta_keywords TEXT NULL,\n meta_robots VARCHAR(200) NULL,\n canonical VARCHAR(2048) NULL,\n lang VARCHAR(20) NULL,\n charset VARCHAR(40) NULL,\n viewport VARCHAR(200) NULL,\n og_title TEXT NULL,\n og_description TEXT NULL,\n og_image VARCHAR(2048) NULL,\n og_type VARCHAR(100) NULL,\n og_url VARCHAR(2048) NULL,\n og_site_name VARCHAR(200) NULL,\n og_locale VARCHAR(20) NULL,\n twitter_card VARCHAR(50) NULL,\n twitter_title TEXT NULL,\n twitter_description TEXT NULL,\n twitter_image VARCHAR(2048) NULL,\n h1_count SMALLINT NULL,\n h1_texts JSON NULL,\n h2_count SMALLINT NULL,\n h2_texts JSON NULL,\n h3_count SMALLINT NULL,\n h4_count SMALLINT NULL,\n h5_count SMALLINT NULL,\n h6_count SMALLINT NULL,\n word_count INT UNSIGNED NULL,\n text_hash CHAR(64) NULL,\n dom_node_count INT UNSIGNED NULL,\n render_time_ms INT UNSIGNED NULL,\n lcp_ms INT UNSIGNED NULL,\n cls DECIMAL(6,4) NULL,\n tbt_ms INT UNSIGNED NULL,\n screenshot_path VARCHAR(500) NULL,\n quality_flags JSON NULL,\n html_raw_path VARCHAR(500) NULL,\n CONSTRAINT fk_pages_crawl FOREIGN KEY (crawl_id) REFERENCES crawls(id) ON DELETE CASCADE,\n CONSTRAINT fk_pages_url FOREIGN KEY (url_id) REFERENCES urls(id),\n UNIQUE KEY uq_crawl_url (crawl_id, url_id),\n INDEX idx_status (http_status),\n INDEX idx_fetched (fetched_at)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS page_headers (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n page_id BIGINT UNSIGNED NOT NULL,\n header_name VARCHAR(200) NOT NULL,\n header_value TEXT NULL,\n CONSTRAINT fk_headers_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,\n INDEX idx_page_name (page_id, header_name)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS links (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n page_id BIGINT UNSIGNED NOT NULL,\n source_url_id BIGINT UNSIGNED NOT NULL,\n target_url VARCHAR(2048) NOT NULL,\n target_url_id BIGINT UNSIGNED NULL,\n anchor_text TEXT NULL,\n rel JSON NULL,\n is_internal TINYINT(1) NOT NULL,\n is_external TINYINT(1) NOT NULL,\n position_hint VARCHAR(30) NULL,\n external_http_status SMALLINT NULL,\n external_final_url VARCHAR(2048) NULL,\n external_checked_at DATETIME(3) NULL,\n external_error VARCHAR(500) NULL,\n CONSTRAINT fk_links_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,\n CONSTRAINT fk_links_source FOREIGN KEY (source_url_id) REFERENCES urls(id),\n CONSTRAINT fk_links_target FOREIGN KEY (target_url_id) REFERENCES urls(id) ON DELETE SET NULL,\n INDEX idx_target (target_url_id),\n INDEX idx_external (is_external)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS images (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n page_id BIGINT UNSIGNED NOT NULL,\n src VARCHAR(2048) NOT NULL,\n alt TEXT NULL,\n title TEXT NULL,\n width INT NULL,\n height INT NULL,\n loading_attr VARCHAR(20) NULL,\n decoding_attr VARCHAR(20) NULL,\n http_status SMALLINT NULL,\n content_length BIGINT NULL,\n CONSTRAINT fk_images_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,\n INDEX idx_page (page_id)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS resources (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n page_id BIGINT UNSIGNED NOT NULL,\n resource_type VARCHAR(40) NOT NULL,\n url VARCHAR(2048) NOT NULL,\n http_status SMALLINT NULL,\n content_length BIGINT NULL,\n duration_ms INT UNSIGNED NULL,\n from_cache TINYINT(1) NULL,\n CONSTRAINT fk_resources_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,\n INDEX idx_type (resource_type)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS structured_data (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n page_id BIGINT UNSIGNED NOT NULL,\n format ENUM('json-ld','microdata','rdfa','opengraph','twitter') NOT NULL,\n schema_type VARCHAR(200) NULL,\n payload JSON NOT NULL,\n CONSTRAINT fk_structured_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,\n INDEX idx_type (schema_type)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS page_tokens (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n page_id BIGINT UNSIGNED NOT NULL,\n token VARCHAR(80) NOT NULL,\n tf INT UNSIGNED NOT NULL,\n CONSTRAINT fk_tokens_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,\n INDEX idx_page (page_id),\n INDEX idx_token (token)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS hreflang (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n page_id BIGINT UNSIGNED NOT NULL,\n lang VARCHAR(20) NOT NULL,\n href VARCHAR(2048) NOT NULL,\n CONSTRAINT fk_hreflang_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,\n INDEX idx_page (page_id)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS js_errors (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n page_id BIGINT UNSIGNED NOT NULL,\n severity ENUM('error','warning') NOT NULL,\n message TEXT NOT NULL,\n source VARCHAR(2048) NULL,\n line_no INT NULL,\n col_no INT NULL,\n CONSTRAINT fk_jserrors_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,\n INDEX idx_page (page_id)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS errors (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n crawl_id BIGINT UNSIGNED NOT NULL,\n url VARCHAR(2048) NOT NULL,\n error_type VARCHAR(80) NOT NULL,\n error_msg TEXT NOT NULL,\n occurred_at DATETIME(3) NOT NULL,\n CONSTRAINT fk_errors_crawl FOREIGN KEY (crawl_id) REFERENCES crawls(id) ON DELETE CASCADE,\n INDEX idx_type (error_type)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS url_queue (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n crawl_id BIGINT UNSIGNED NOT NULL,\n url_hash CHAR(64) NOT NULL,\n url VARCHAR(2048) NOT NULL,\n enqueued_at DATETIME(3) NOT NULL,\n status ENUM('pending','in_progress','done','failed') NOT NULL DEFAULT 'pending',\n CONSTRAINT fk_queue_crawl FOREIGN KEY (crawl_id) REFERENCES crawls(id) ON DELETE CASCADE,\n UNIQUE KEY uq_crawl_hash (crawl_id, url_hash),\n INDEX idx_pending (crawl_id, status, enqueued_at)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;\n\nCREATE TABLE IF NOT EXISTS external_checks (\n id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n url_hash CHAR(64) NOT NULL UNIQUE,\n url VARCHAR(2048) NOT NULL,\n http_status SMALLINT NULL,\n ... [TRUNCATED-21f71e8ef3ca0bd6]",
"structuredPatch": [
{
"oldStart": 164,
"oldLines": 9,
"newStart": 164,
"newLines": 20,
"lines": [
" id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,",
" page_id BIGINT UNSIGNED NOT NULL,",
" token VARCHAR(80) NOT NULL,",
"+ lemma VARCHAR(80) NULL,",
" tf INT UNSIGNED NOT NULL,",
" CONSTRAINT fk_tokens_page FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,",
" INDEX idx_page (page_id),",
"+ INDEX idx_token (token),",
"+ INDEX idx_lemma (lemma)",
"+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;",
"+",
"+CREATE TABLE IF NOT EXISTS reference_corpus_de (",
"+ id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,",
"+ corpus VARCHAR(40) NOT NULL,",
"+ token VARCHAR(80) NOT NULL,",
"+ frequency BIGINT UNSIGNED NOT NULL,",
"+ UNIQUE KEY uq_corpus_token (corpus, token),",
" INDEX idx_token (token)",
" ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;",
" "
]
}
],
"userModified": false,
"replaceAll": false
}
}