# ===================================================== # AI / LLM crawlers - block training access # ===================================================== User-agent: GPTBot Disallow: / User-agent: CCBot Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: DeepSeekBot Disallow: / User-agent: DeepSeek Disallow: / User-agent: PetalBot Disallow: / # ===================================================== # SEO / scraping tools - block heavy crawlers # ===================================================== User-agent: SemrushBot Disallow: / User-agent: SemrushBot-SA Disallow: / User-agent: MJ12bot Disallow: / User-agent: AhrefsBot Disallow: / # ===================================================== # Default rules - all other crawlers # ===================================================== User-agent: * Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php # WooCommerce sensitive directories Disallow: /wp-content/uploads/wc-logs/ Disallow: /wp-content/uploads/woocommerce_transient_files/ Disallow: /wp-content/uploads/woocommerce_uploads/ # Transactional pages (never index) Disallow: /cart/ Disallow: /checkout/ Disallow: /my-account/ Disallow: /order-received/ # Cart / AJAX parameters Disallow: /*add-to-cart=* Disallow: /*?add-to-cart=* Disallow: /*?wc-ajax=* Disallow: /*wc-ajax=* # Internal search Disallow: /?s=* Disallow: /search/ # Feeds Disallow: /feed/ Disallow: /comments/feed/ # Optional: prevent crawl of author archives (often thin content) Disallow: /author/