# robots.txt — ithouse.tech # Intelligent House of Digital Innovation # Last updated: 2026-06-12 # https://ithouse.tech # ============================================================ # DEFAULT: ALL CRAWLERS WELCOME # ============================================================ User-agent: * Allow: / Disallow: /cdn-cgi/ Disallow: /api/ Disallow: /.well-known/ Crawl-delay: 1 # ============================================================ # GOOGLE # ============================================================ User-agent: Googlebot Allow: / Allow: /assets/ Allow: /css/ Allow: /js/ User-agent: Googlebot-Image Allow: /assets/images/ Allow: / User-agent: Googlebot-Video Allow: / User-agent: AdsBot-Google Allow: / User-agent: Google-InspectionTool Allow: / User-agent: Google-Extended Allow: / # ============================================================ # BING / MICROSOFT # ============================================================ User-agent: Bingbot Allow: / Crawl-delay: 1 User-agent: adidxbot Allow: / User-agent: msnbot Allow: / # ============================================================ # AI CRAWLERS — EXPLICITLY ALLOWED # ithouse.tech welcomes all AI indexing for GEO citations # ============================================================ # OpenAI / ChatGPT User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / # Anthropic Claude User-agent: ClaudeBot Allow: / User-agent: Claude-Web Allow: / User-agent: anthropic-ai Allow: / # Perplexity AI User-agent: PerplexityBot Allow: / # Meta AI (Llama, Meta AI assistant) User-agent: Meta-ExternalAgent Allow: / User-agent: Meta-ExternalFetcher Allow: / User-agent: facebookexternalhit Allow: / # Apple (Siri, Spotlight) User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / # Common Crawl (trains most open-source AI models) User-agent: CCBot Allow: / # You.com AI Search User-agent: YouBot Allow: / # Cohere AI User-agent: cohere-ai Allow: / # Amazon / Alexa User-agent: Amazonbot Allow: / # Bytedance / TikTok AI User-agent: Bytespider Allow: / # Diffbot (knowledge graphs) User-agent: Diffbot Allow: / # Brave Search User-agent: Brave-AI Allow: / # DuckDuckGo User-agent: DuckDuckBot Allow: / # ============================================================ # SEO TOOLS — ALLOWED WITH RATE LIMITING # ============================================================ User-agent: Semrushbot Allow: / Crawl-delay: 5 User-agent: SemrushBot-SA Allow: / Crawl-delay: 5 User-agent: AhrefsBot Allow: / Crawl-delay: 5 User-agent: AhrefsSiteAudit Allow: / Crawl-delay: 5 User-agent: DataForSeoBot Allow: / Crawl-delay: 5 User-agent: MajesticSEO Allow: / Crawl-delay: 5 User-agent: Screaming-Frog-SEO-Spider Allow: / Crawl-delay: 5 # ============================================================ # BLOCK AGGRESSIVE SCRAPERS # ============================================================ User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: SeznamBot Disallow: / User-agent: PetalBot Disallow: / User-agent: Yandex Disallow: / # ============================================================ # SITEMAPS # ============================================================ Sitemap: https://ithouse.tech/sitemap.xml # ============================================================ # HOST # ============================================================ Host: ithouse.tech