From 3616dccc1477bbdb5300d6a745d0986ebe99c08f Mon Sep 17 00:00:00 2001 From: Matias Benary Date: Wed, 11 Feb 2026 12:49:20 -0300 Subject: [PATCH 1/5] wip: fixe md static --- website/package.json | 4 +- website/scripts/copy-md-to-static.js | 317 ------------- website/scripts/copy-md-to-static.mjs | 425 ++++++++++++++++++ website/scripts/index-meilisearch.mjs | 145 +++--- website/scripts/shared.mjs | 32 ++ .../src/theme/SearchBar/AIChatInSearch.tsx | 215 +++++++++ website/src/theme/SearchBar/index.tsx | 199 +++++--- website/src/theme/SearchBar/styles.module.css | 252 +++++++++++ 8 files changed, 1128 insertions(+), 461 deletions(-) delete mode 100644 website/scripts/copy-md-to-static.js create mode 100644 website/scripts/copy-md-to-static.mjs create mode 100644 website/scripts/shared.mjs create mode 100644 website/src/theme/SearchBar/AIChatInSearch.tsx diff --git a/website/package.json b/website/package.json index 51dd9081dd0..2c3d4689b7e 100644 --- a/website/package.json +++ b/website/package.json @@ -15,8 +15,8 @@ "build": "./node_modules/.bin/docusaurus build && yarn run process-markdown", "build:dev": "./node_modules/.bin/docusaurus build --dev && yarn run process-markdown", "build:preview": "yarn run process-markdown && ./node_modules/.bin/docusaurus build --locale en && yarn run process-markdown", - "process-markdown": "node ./scripts/copy-md-to-static.js", - "meilisearch:build-index": "npm run process-markdown && node scripts/index-meilisearch.mjs", + "process-markdown": "node ./scripts/copy-md-to-static.mjs", + "meilisearch:build-index": "node scripts/index-meilisearch.mjs", "swizzle": "docusaurus swizzle", "docusaurus": "docusaurus" }, diff --git a/website/scripts/copy-md-to-static.js b/website/scripts/copy-md-to-static.js deleted file mode 100644 index 9e11c26509d..00000000000 --- a/website/scripts/copy-md-to-static.js +++ /dev/null @@ -1,317 +0,0 @@ -#!/usr/bin/env node - -const glob = require('glob'); -const path = require('path'); -const fs = require('fs'); -const axios = require('axios'); -const { URL } = require('url'); - -async function fetchGitHubCode(tag) { - // Replace single quotes with double quotes for consistent parsing - const normalizedTag = tag.replace(/'/g, '"'); - - // Extract URL from the tag - const urlMatch = normalizedTag.match(/url="(.*?)"/); - if (!urlMatch) return null; - - let url = urlMatch[1]; - // Remove hash fragment - url = url.split('#')[0]; - - // Parse URL and extract components - const urlObj = new URL(url); - const pathSegments = urlObj.pathname.slice(1).split('/'); - - if (pathSegments.length < 4) return null; - - const [org, repo, , branch, ...pathSeg] = pathSegments; - const filePath = pathSeg.join('/'); - - // Construct raw GitHub URL - const rawUrl = `https://raw.githubusercontent.com/${org}/${repo}/${branch}/${filePath}`; - - try { - // Fetch the code - const response = await axios.get(rawUrl); - return { - code: response.data, - normalizedTag, - url: rawUrl - }; - } catch (error) { - console.error(`āŒ ${rawUrl}`); - return null; - } -} - -// Extract code lines based on start/end parameters and format as code block -function formatCodeBlock(codeData, language = '') { - const { code, normalizedTag } = codeData; - - // Extract line numbers - const startMatch = normalizedTag.match(/start="(\d*)"/); - const endMatch = normalizedTag.match(/end="(\d*)"/); - - const codeLines = String(code).split('\n'); - const start = startMatch ? Math.max(parseInt(startMatch[1]) - 1, 0) : 0; - const end = endMatch ? parseInt(endMatch[1]) + 1 : codeLines.length; - - const selectedCode = codeLines.slice(start, end).join('\n'); - - // Return formatted code block with optional language syntax highlighting - return `\`\`\`${language}\n${selectedCode}\n\`\`\``; -} - -async function replaceTagsWithCode(content, tagName, includeLanguage = false) { - const tagRegex = new RegExp(`<${tagName}\\s[^>]*?(?:\\/>|>[^<]*<\\/${tagName}>)`, 'g'); - const tags = content.match(tagRegex) || []; - - for (const tag of tags) { - const codeData = await fetchGitHubCode(tag); - - if (codeData) { - let language = ''; - - // Extract language if needed (for File tags) - if (includeLanguage) { - const languageMatch = codeData.normalizedTag.match(/language="(.*?)"/); - language = languageMatch ? languageMatch[1] : ''; - } - - const codeBlock = formatCodeBlock(codeData, language); - content = content.replace(tag, codeBlock); - } - } - - return content; -} - -async function replaceGithubWithCode(content) { - return replaceTagsWithCode(content, 'Github', false); -} - -async function replaceFileWithCode(content) { - return replaceTagsWithCode(content, 'File', true); -} - -// Directories -const DOCS_DIR = path.join(__dirname, '../../docs'); -const BUILD_DIR = path.join(__dirname, '../build'); - -console.log('šŸš€ Starting markdown files post-processing...'); -console.log('This script will copy processed .md files alongside .html files in build/'); - -// Clear md -async function cleanContent(content) { - let cleaned = content; - - // Remove all imports (including JSX components and MDX imports) - cleaned = cleaned.replace(/^import\s+.*?from\s+['"].*?['"];?\s*$/gm, ''); - cleaned = cleaned.replace(/^import\s+\{[^}]*\}\s+from\s+['"].*?['"];?\s*$/gm, ''); - cleaned = cleaned.replace(/^import\s+.*?$/gm, ''); - - cleaned = await replaceGithubWithCode(cleaned); - cleaned = await replaceFileWithCode(cleaned); - - return cleaned; -} - -function generatePath(filePath) { - const content = fs.readFileSync(filePath, 'utf8'); - - const frontmatterMatch = content.match(/^---\s*\n([\s\S]*?)\n---/); - let id = null; - if (frontmatterMatch) { - const frontmatter = frontmatterMatch[1]; - const idMatch = frontmatter.match(/^id:\s*(.+)$/m); - if (idMatch) { - id = idMatch[1].trim().replace(/['"]/g, ''); - } - } - - const relativePath = path.relative(DOCS_DIR, filePath); - - const dirPath = path.dirname(relativePath); - - let newFilename; - if (id) { - newFilename = `${id}.md`; - } else { - newFilename = path.basename(filePath); - } - - return path.join(BUILD_DIR, dirPath, newFilename); -} - -function extractFrontmatter(content) { - const frontmatter = {}; - let body = content; - - if (content.startsWith('---\n')) { - const endIndex = content.indexOf('\n---\n', 4); - if (endIndex !== -1) { - const frontmatterText = content.substring(4, endIndex); - body = content.substring(endIndex + 5); - const lines = frontmatterText.split('\n'); - for (const line of lines) { - const colonIndex = line.indexOf(':'); - if (colonIndex > 0) { - const key = line.substring(0, colonIndex).trim(); - const value = line.substring(colonIndex + 1).trim().replace(/^["']|["']$/g, ''); - frontmatter[key] = value; - } - } - } - } - - return { frontmatter, body }; -} - -function getDescription(content) { - const lines = content.split('\n'); - for (const line of lines) { - const trimmed = line.trim(); - if (trimmed && - trimmed.length > 0) { - return trimmed - } - } - return '' -} - -async function checkLink(url) { - try { - const response = await axios.head(url, { timeout: 5000 }); - return { url, status: response.status, ok: true }; - } catch (error) { - const status = error.response?.status || 'NO RESPONSE'; - console.log(`āŒ ${url} - ${status}`); - return { url, status, ok: false }; - } -} - -const allMarkdownFiles = glob.sync(path.join(DOCS_DIR, '**/*.md')); - -async function processMarkdownFiles() { - const documentationPages = {}; - - await Promise.all( - allMarkdownFiles.map(async (markdownFilePath) => { - let fileContent = fs.readFileSync(markdownFilePath, 'utf8'); - - fileContent = await cleanContent(fileContent); - - const outputFilePath = generatePath(markdownFilePath); - const outputDirectory = path.dirname(outputFilePath); - - if (!fs.existsSync(outputDirectory)) fs.mkdirSync(outputDirectory, { recursive: true }); - - fs.writeFileSync(outputFilePath, fileContent, 'utf8'); - - const relativeFilename = path.relative(DOCS_DIR, markdownFilePath); - - if (relativeFilename === "index.md" || relativeFilename === "help.md") return; - - const pathSegments = relativeFilename.split('/'); - const sectionName = pathSegments[0]; - const fileName = pathSegments.pop(); - const alternativeId = fileName.replace('.md', ''); - - if (!documentationPages[sectionName]) { - documentationPages[sectionName] = []; - } - - const { frontmatter, body } = extractFrontmatter(fileContent); - const pageDescription = getDescription(body); - - if (pageDescription.startsWith('#') || - pageDescription.startsWith('import') || - pageDescription.startsWith(':::') || - pageDescription.startsWith('![')) { - console.warn(`Warning: No valid description found in ${relativeFilename}`); - } - if(!frontmatter.description){ - console.log(`āŒ No description tag found for ${relativeFilename}`); - } - const pageTitle = frontmatter.title || - frontmatter.sidebar_label || - alternativeId.replace(/[-_]/g, ' ').replace(/\b\w/g, letter => letter.toUpperCase()); - - const pageUrl = `${pathSegments.join("/")}/${frontmatter.id ? frontmatter.id + ".md" : fileName}`; - const pageId = frontmatter.id || alternativeId; - - documentationPages[sectionName].push({ - title: pageTitle, - url: pageUrl, - description: frontmatter.description || pageDescription, - id: pageId, - }); - }) - ); - - const documentationSections = { - "protocol": { name: "Core Protocol" }, - "ai": { name: "AI and Agents" }, - "chain-abstraction": { name: "Chain Abstraction" }, - "smart-contracts": { name: "Smart Contracts" }, - "web3-apps": { name: "Web3 Applications" }, - "primitives": { name: "Tokens and Primitives" }, - "tools": { name: "Developer Tools" }, - "tutorials": { name: "Tutorials and Examples" }, - "api": { name: "API Reference" }, - "data-infrastructure": { name: "Data Infrastructure" }, - "integrations": { name: "Integration Examples" }, - "resources": { name: "Resources" } - }; - - let documentationContent = `# NEAR Protocol Documentation - -> NEAR is a layer-1 blockchain built for scale and multichain compatibility, featuring AI-native infrastructure and chain abstraction capabilities. This documentation covers smart contracts, Web3 applications, AI agents, cross-chain development, and the complete NEAR ecosystem. -NEAR Protocol is a proof-of-stake blockchain that enables developers to build decentralized applications with seamless user experiences. Key features include human-readable account names, minimal transaction fees, and built-in developer tools. The platform supports multiple programming languages and provides chain abstraction for cross-blockchain interactions. -This documentation is organized into several main sections: Protocol fundamentals, AI and agent development, chain abstraction features, smart contract development, Web3 application building, and comprehensive API references. Each section includes tutorials, examples, and detailed technical specifications. - -`; - const links =[]; - for (const sectionKey in documentationSections) { - const section = documentationSections[sectionKey]; - const sectionPages = documentationPages[sectionKey] || []; - - let sectionContent = `## ${section.name}\n`; - - const orderedPages = sectionPages.sort((a, b) => { - return a.url.localeCompare(b.url); - }); - - for (const page of orderedPages) { - const cleanDescription = (page.description || page.title) - .replace(/\s*\n\s*/g, ' ') - .trim(); - links.push(`https://docs.near.org/${page.url}`); - - sectionContent += `- [${page.title}](https://docs.near.org/${page.url}): ${cleanDescription}\n`; - } - - documentationContent += sectionContent + '\n'; - } - - const outputFilePath = path.join(BUILD_DIR, 'llms.txt'); - const outputDirectory = BUILD_DIR; - - if (!fs.existsSync(outputDirectory)) { - fs.mkdirSync(outputDirectory, { recursive: true }); - } - - fs.writeFileSync(outputFilePath, documentationContent, 'utf-8'); - - console.log("Checking links..."); - const results = await Promise.all(links.map(checkLink)); - const broken = results.filter(r => !r.ok); - if (broken.length > 0) { - console.log('\nšŸ”“ Broken URLs:'); - broken.forEach(b => console.log(`${b.url} - Status: ${b.status}`)); - } else { - console.log('🟢 All links are valid'); - } -} - -processMarkdownFiles(); diff --git a/website/scripts/copy-md-to-static.mjs b/website/scripts/copy-md-to-static.mjs new file mode 100644 index 00000000000..7dbae8f6e4e --- /dev/null +++ b/website/scripts/copy-md-to-static.mjs @@ -0,0 +1,425 @@ +#!/usr/bin/env node + +import { globSync } from 'glob'; +import path from 'path'; +import fs from 'fs'; +import { + DOCS_DIR, + BUILD_DIR, + BASE_URL, + extractFrontmatter, +} from './shared.mjs'; + + +export const DOCUMENTATION_SECTIONS = { + protocol: 'Core Protocol', + ai: 'AI and Agents', + 'chain-abstraction': 'Chain Abstraction', + 'smart-contracts': 'Smart Contracts', + 'web3-apps': 'Web3 Applications', + primitives: 'Tokens and Primitives', + tools: 'Developer Tools', + tutorials: 'Tutorials and Examples', + api: 'API Reference', + 'data-infrastructure': 'Data Infrastructure', + integrations: 'Integration Examples', + aurora: 'Aurora', + quest: 'Learning Quests', +}; + + +const SKIPPED_FILES = new Set(['index.md', 'help.md']); + +const githubCache = new Map(); +let cacheHits = 0; + +const JSX_COMPONENTS = [ + 'TabItem', 'Tabs', 'CodeTabs', + 'Card', 'ConceptCard', + 'SplitLayoutContainer', 'SplitLayoutLeft', 'SplitLayoutRight', + 'Language', 'Block', + 'Quiz', 'Progress','MultipleChoice', 'Option', + 'LantstoolLabel', 'TryOutOnLantstool', + 'MovingForwardSupportSection', 'SigsSupport', 'TryDemo', + 'ExplainCode', 'CodeBlock', + 'LandingHero', 'Faucet', 'AIBadges', + 'CreateTokenForm', 'MintNFT', + 'FeatureList', 'Column', 'Feature', +]; + +const LLMS_TXT_HEADER = `# NEAR Protocol Documentation + +> NEAR is a layer-1 blockchain built for scale and multichain compatibility, +> featuring AI-native infrastructure and chain abstraction capabilities. +> This documentation covers smart contracts, Web3 applications, AI agents, +> cross-chain development, and the complete NEAR ecosystem. + +NEAR Protocol is a proof-of-stake blockchain that enables developers to build +decentralized applications with seamless user experiences. Key features include +human-readable account names, minimal transaction fees, and built-in developer +tools. The platform supports multiple programming languages and provides chain +abstraction for cross-blockchain interactions. + +This documentation is organized into several main sections: Protocol fundamentals, +AI and agent development, chain abstraction features, smart contract development, +Web3 application building, and comprehensive API references. Each section includes +tutorials, examples, and detailed technical specifications. + + +`; + + +function parseGitHubTag(tag) { + const normalized = tag.replace(/'/g, '"'); + + const urlMatch = normalized.match(/url="(.*?)"/); + if (!urlMatch) return null; + + const url = urlMatch[1].split('#')[0]; + const urlObj = new URL(url); + const segments = urlObj.pathname.slice(1).split('/'); + if (segments.length < 4) return null; + + const [org, repo, , branch, ...rest] = segments; + const filePath = rest.join('/'); + + return { + rawUrl: `https://raw.githubusercontent.com/${org}/${repo}/${branch}/${filePath}`, + normalized, + }; +} + +async function fetchGitHubCode(tag) { + const parsed = parseGitHubTag(tag); + if (!parsed) { + console.warn('Invalid GitHub tag format'); + return null; + } + + if (githubCache.has(parsed.rawUrl)) { + cacheHits++; + return githubCache.get(parsed.rawUrl); + } + + try { + const response = await fetch(parsed.rawUrl); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + + const code = await response.text(); + const result = { code, normalized: parsed.normalized }; + + githubCache.set(parsed.rawUrl, result); + + return result; + } catch (error) { + console.error(`āŒ Failed to fetch ${parsed.rawUrl}: ${error.message}`); + return null; + } +} + +function extractCodeSlice(code, tagAttrs) { + const startMatch = tagAttrs.match(/start="(\d*)"/); + const endMatch = tagAttrs.match(/end="(\d*)"/); + + const lines = String(code).split('\n'); + const start = startMatch ? Math.max(parseInt(startMatch[1]) - 1, 0) : 0; + const end = endMatch ? parseInt(endMatch[1]) + 1 : lines.length; + + return lines.slice(start, end).join('\n'); +} + +async function replaceTagsWithCode(content, tagName, { includeLanguage = false } = {}) { + const tagRegex = new RegExp(`<${tagName}\\s[^>]*?(?:\\/>|>[^<]*<\\/${tagName}>)`, 'g'); + const tags = content.match(tagRegex) || []; + + for (const tag of tags) { + const codeData = await fetchGitHubCode(tag); + if (!codeData) continue; + + let language = ''; + if (includeLanguage) { + const langMatch = codeData.normalized.match(/language="(.*?)"/); + language = langMatch ? langMatch[1] : ''; + } + + const slice = extractCodeSlice(codeData.code, codeData.normalized); + content = content.replace(tag, `\`\`\`${language}\n${slice}\n\`\`\``); + } + + return content; +} + +function transformOutsideCodeBlocks(content, transformFn) { + const segments = content.split(/(```[\s\S]*?```)/g); + return segments.map((segment, index) => { + if (index % 2 === 1) return segment; // code block, keep as-is + return transformFn(segment); + }).join(''); +} + +function removeImports(content) { + return transformOutsideCodeBlocks(content, (text) => + text + .replace(/^import\s+.*?from\s+['"].*?['"];?\s*$/gm, '') + .replace(/^import\s+\{[^}]*\}\s+from\s+['"].*?['"];?\s*$/gm, '') + .replace(/^import\s+.*?$/gm, '') + ); +} + + +function stripJsx(content) { + const names = JSX_COMPONENTS.join('|'); + const singleLineSelfClosing = new RegExp(`^\\s*<(?:${names})\\b.*/>\\s*$`); + const singleLineOpening = new RegExp(`^\\s*<(?:${names})\\b.*>\\s*$`); + const closingTag = new RegExp(`^\\s*\\s*$`); + const multiLineStart = new RegExp(`^\\s*<(?:${names})\\b`); + + const lines = content.split('\n'); + const result = []; + let insideMultiLineTag = false; + + for (const line of lines) { + if (insideMultiLineTag) { + const trimmed = line.trim(); + if (trimmed === '/>' || trimmed === '>' || trimmed.endsWith('/>') || trimmed.endsWith('>')) { + insideMultiLineTag = false; + } + continue; + } + + if (singleLineSelfClosing.test(line)) continue; + if (singleLineOpening.test(line)) continue; + if (closingTag.test(line)) continue; + + if (multiLineStart.test(line) && !line.includes('>')) { + insideMultiLineTag = true; + continue; + } + + result.push(line); + } + + content = result.join('\n'); + + return content + .replace(/]*\/?>/g, '---') + .replace(/]*>([\s\S]*?)<\/a>/g, '[$2]($1)') + .replace(/]*\/?>/g, '![$2]($1)') + .replace(/
  • (.*?)<\/li>/gm, '- $1') + .replace(/<\/?(?:ul|ol)>/g, '') + .replace(/]*>/g, '') + .replace(/<\/div>/g, '') + .replace(/]*>/g, '') + .replace(/<\/span>/g, '') + .replace(/<\/?p>/g, '') + .replace(//g, '\n') + .replace(/|\/>)/g, '') + .replace(/^\s*
    \s*$/gm, '') + .replace(/^\s*<\/details>\s*$/gm, '') + .replace(/^\s*(.*?)<\/summary>\s*$/gm, '**$1**') + .replace(/^\s*:::\w+.*$/gm, '') + .replace(/^\s*:::$/gm, ''); +} + +function removeJsxTags(content) { + return transformOutsideCodeBlocks(content, stripJsx) + .replace(/\n{3,}/g, '\n\n'); +} + +function fixImagePaths(content) { + return content.replace(/@site\/static\//g, `${BASE_URL}/`); +} + +function resolveRelativeLinks(content, relativeFilePath) { + const fileDir = path.dirname(relativeFilePath); + + return content.replace(/\]\(([^)]+)\)/g, (match, linkPath) => { + if (/^(https?:|#|data:|\/\/)/.test(linkPath)) return match; + + if (linkPath.startsWith('/')) { + return `](${BASE_URL}${linkPath})`; + } + + if (linkPath.startsWith('./') || linkPath.startsWith('../')) { + const [pathPart, ...anchorParts] = linkPath.split('#'); + const anchor = anchorParts.length ? '#' + anchorParts.join('#') : ''; + const resolved = path.normalize(path.join(fileDir, pathPart)); + return `](${BASE_URL}/${resolved}${anchor})`; + } + + return match; + }); +} + +async function cleanContent(content, relativeFilePath) { + let cleaned = removeImports(content); + cleaned = await replaceTagsWithCode(cleaned, 'Github', { includeLanguage: true }); + cleaned = await replaceTagsWithCode(cleaned, 'File', { includeLanguage: true }); + cleaned = removeJsxTags(cleaned); + cleaned = fixImagePaths(cleaned); + cleaned = resolveRelativeLinks(cleaned, relativeFilePath); + return cleaned; +} + + +function getFirstNonEmptyLine(text) { + for (const line of text.split('\n')) { + const trimmed = line.trim(); + if (trimmed.length > 0) return trimmed; + } + return ''; +} + + +function getOutputPath(filePath, frontmatterId) { + const relativePath = path.relative(DOCS_DIR, filePath); + const dirPath = path.dirname(relativePath); + const filename = frontmatterId ? `${frontmatterId}.md` : path.basename(filePath); + + return path.join(BUILD_DIR, dirPath, filename); +} + + +function writeFileSafe(filePath, content) { + const dir = path.dirname(filePath); + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(filePath, content, 'utf8'); +} + +function buildPageMetadata(relativeFilename, frontmatter, body) { + const pathSegments = relativeFilename.split('/'); + const section = pathSegments[0]; + const fileName = pathSegments.pop(); + const title = frontmatter.title || frontmatter.sidebar_label; + + if (!title) { + console.warn(`āš ļø Missing title in ${relativeFilename}`); + } + + const pageId = frontmatter.id; + const url = `${pathSegments.join('/')}/${frontmatter.id ? frontmatter.id + '.md' : fileName}`; + const firstLine = getFirstNonEmptyLine(body); + + // Validate description + let description = frontmatter.description; + if (!description) { + console.log(`āŒ No description tag found for ${relativeFilename}`); + + if (firstLine.startsWith('#') || firstLine.startsWith('import') || + firstLine.startsWith(':::') || firstLine.startsWith('![')) { + console.warn(`āš ļø No valid description found in ${relativeFilename}`); + description = title; // Fallback to title + } else { + description = firstLine; + } + } + + return { + section, + title, + url, + description, + id: pageId, + }; +} + + +async function checkLink(url) { + try { + const response = await fetch(url, { method: 'HEAD', signal: AbortSignal.timeout(5000) }); + return { url, status: response.status, ok: response.ok }; + } catch (error) { + const status = 'NO RESPONSE'; + console.log(`āŒ ${url} - ${status}`); + return { url, status, ok: false }; + } +} + +async function reportBrokenLinks(links) { + console.log('Checking links...'); + const results = await Promise.all(links.map(checkLink)); + const broken = results.filter(r => !r.ok); + + if (broken.length > 0) { + console.log('\nšŸ”“ Broken URLs:'); + broken.forEach(b => console.log(`${b.url} - Status: ${b.status}`)); + } else { + console.log('🟢 All links are valid'); + } +} + + +function buildLlmsTxt(pagesBySection) { + let content = LLMS_TXT_HEADER; + const links = []; + + for (const [key, section] of Object.entries(DOCUMENTATION_SECTIONS)) { + const pages = (pagesBySection[key] || []).sort((a, b) => a.url.localeCompare(b.url)); + if (pages.length === 0) continue; + + content += `## ${section}\n`; + for (const page of pages) { + const desc = (page.description || page.title).replace(/\s*\n\s*/g, ' ').trim(); + const fullUrl = `${BASE_URL}/${page.url}`; + links.push(fullUrl); + content += `- [${page.title}](${fullUrl}): ${desc}\n`; + } + content += '\n'; + } + + return { content, links }; +} + +async function processMarkdownFiles() { + console.log('šŸš€ Starting markdown files post-processing...'); + console.log('This script will copy processed .md files alongside .html files in build/'); + + const allMarkdownFiles = globSync(path.join(DOCS_DIR, '**/*.md')); + console.log(`šŸ“ Found ${allMarkdownFiles.length} markdown files`); + + const pagesBySection = {}; + let processedCount = 0; + let errorCount = 0; + + await Promise.all( + allMarkdownFiles.map(async (filePath) => { + try { + const relativeFilename = path.relative(DOCS_DIR, filePath); + const rawContent = fs.readFileSync(filePath, 'utf8'); + const cleanedContent = await cleanContent(rawContent, relativeFilename); + const { frontmatter, body } = extractFrontmatter(cleanedContent); + + const outputPath = getOutputPath(filePath, frontmatter.id); + writeFileSafe(outputPath, cleanedContent); + + processedCount++; + if (processedCount % 10 === 0) { + console.log(`āœ… Processed ${processedCount}/${allMarkdownFiles.length} files`); + } + + if (SKIPPED_FILES.has(relativeFilename)) return; + + const page = buildPageMetadata(relativeFilename, frontmatter, body); + + if (!pagesBySection[page.section]) { + pagesBySection[page.section] = []; + } + pagesBySection[page.section].push(page); + } catch (error) { + errorCount++; + console.error(`āŒ Error processing ${path.relative(DOCS_DIR, filePath)}: ${error.message}`); + } + }) + ); + + const { content: llmsTxt, links } = buildLlmsTxt(pagesBySection); + writeFileSafe(path.join(BUILD_DIR, 'llms.txt'), llmsTxt); + console.log(`šŸ“ Generated llms.txt with ${links.length} links\n`); + + await reportBrokenLinks(links); +} + +processMarkdownFiles(); diff --git a/website/scripts/index-meilisearch.mjs b/website/scripts/index-meilisearch.mjs index fd6e94b82db..8e5b7f5f353 100644 --- a/website/scripts/index-meilisearch.mjs +++ b/website/scripts/index-meilisearch.mjs @@ -3,19 +3,13 @@ import { MeiliSearch } from 'meilisearch'; import fs from 'fs'; import { globSync } from 'glob'; import path from 'path'; -import { fileURLToPath } from 'url'; +import { + BUILD_DIR, + extractFrontmatter, +} from './shared.mjs'; import { createHash } from 'crypto'; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); - -const MEILI_HOST = process.env.MEILI_HOST || 'http://localhost:7700'; -const MEILI_MASTER_KEY = process.env.MEILI_MASTER_KEY || 'masterKey123'; -const MEILI_INDEX_NAME = process.env.MEILI_INDEX_NAME || 'near-docs'; -const DOCS_PATH = path.resolve(__dirname, '../static'); -const BATCH_SIZE = 100; -const TASK_TIMEOUT = 300000; // 5 minutes timeout for tasks with embedders - -const CATEGORY_MAP = { +export const CATEGORY_MAP = { 'protocol': 'Protocol', 'chain-abstraction': 'Multi-Chain', 'ai': 'AI & Agents', @@ -25,19 +19,26 @@ const CATEGORY_MAP = { 'data-infrastructure': 'Data Infrastructure', 'tools': 'Tools', 'api': 'API', + 'integrations': 'Integration Examples', + 'aurora': 'Aurora', }; +const MEILI_HOST = process.env.MEILI_HOST || 'http://localhost:7700'; +const MEILI_MASTER_KEY = process.env.MEILI_MASTER_KEY || 'masterKey123'; +const MEILI_INDEX_NAME = process.env.MEILI_INDEX_NAME || 'near-docs'; +const BATCH_SIZE = 100; +const TASK_TIMEOUT = 300000; + function getCategoryFromPath(filePath) { - const relativePath = path.relative(DOCS_PATH, filePath); + const relativePath = path.relative(BUILD_DIR, filePath); const firstFolder = relativePath.split(path.sep)[0]; return CATEGORY_MAP[firstFolder] || 'General'; } function getHierarchy(filePath) { - const relativePath = path.relative(DOCS_PATH, filePath); + const relativePath = path.relative(BUILD_DIR, filePath); const parts = relativePath.split(path.sep); - // Remove file name parts.pop(); const hierarchy = { @@ -59,82 +60,84 @@ function getHierarchy(filePath) { .replace(/^\d+\s*/g, '') .replace(/\b\w/g, c => c.toUpperCase()); } - + return hierarchy; } +function getUrlPath(filePath, frontmatter = {}) { + const relativePath = path.relative(BUILD_DIR, filePath); + const pathParts = relativePath.replace(/\\/g, '/').split('/'); + const fileName = pathParts.pop().replace(/\.mdx?$/, ''); + + const docId = frontmatter.id || fileName.replace(/^\d+-/, ''); + + const cleanPathParts = pathParts.map(part => part.replace(/^\d+-/, '')); + + let urlPath; + + if (docId === 'index') { + urlPath = cleanPathParts.join('/'); + } else { + const parentFolder = cleanPathParts[cleanPathParts.length - 1]; + if (docId === parentFolder) { + urlPath = cleanPathParts.join('/'); + } else { + urlPath = [...cleanPathParts, docId].join('/'); + } + } + + return '/' + urlPath; +} + async function indexDocuments() { - // console.log('Starting MeiliSearch indexation...'); - // console.log(`Host: ${MEILI_HOST}`); - // console.log(`Index: ${MEILI_INDEX_NAME}`); - - // // Initialize client - // const client = new MeiliSearch({ - // host: MEILI_HOST, - // apiKey: MEILI_MASTER_KEY, - // }); - - // // Check connection - // try { - // const health = await client.health(); - // console.log('MeiliSearch status:', health.status); - // } catch (error) { - // console.error('Failed to connect to MeiliSearch:', error.message); - // console.error('Make sure MeiliSearch is running at', MEILI_HOST); - // process.exit(1); - // } - - // // Get index - // let index; - // index = await client.getIndex(MEILI_INDEX_NAME); - // console.log('Using existing index:', MEILI_INDEX_NAME); - - // // Configure index settings - // console.log('Configuring index settings...'); - // await index.updateSettings({ - // searchableAttributes: ['title', 'content', 'section', 'hierarchy_lvl0', 'hierarchy_lvl1', 'hierarchy_lvl2'], - // filterableAttributes: ['category', 'version', 'hierarchy_lvl0'], - // sortableAttributes: ['timestamp'], - // rankingRules: ['words', 'typo', 'proximity', 'attribute', 'sort', 'exactness'], - // distinctAttribute: 'path', - // embedders: { - // default: { - // source: 'huggingFace', - // model: 'sentence-transformers/all-MiniLM-L6-v2', - // documentTemplate: '{{doc.title}} {{doc.content}}', - // }, - // }, - // }); - - // Get all markdown files - let files = globSync('../static/**/*.md', { cwd: __dirname }); + console.log('Starting MeiliSearch indexation...'); + console.log(`Host: ${MEILI_HOST}`); + console.log(`Index: ${MEILI_INDEX_NAME}`); + + const client = new MeiliSearch({ + host: MEILI_HOST, + apiKey: MEILI_MASTER_KEY, + }); + + let index = await client.getIndex(MEILI_INDEX_NAME); + console.log('Using existing index:', MEILI_INDEX_NAME); + + await index.updateSettings({ + searchableAttributes: ['title', 'content', 'section', 'hierarchy_lvl0', 'hierarchy_lvl1', 'hierarchy_lvl2'], + filterableAttributes: ['category', 'version', 'hierarchy_lvl0'], + sortableAttributes: ['timestamp'], + rankingRules: ['words', 'typo', 'proximity', 'attribute', 'sort', 'exactness'], + distinctAttribute: 'path', + embedders: { + default: { + source: 'huggingFace', + model: 'sentence-transformers/all-MiniLM-L6-v2', + documentTemplate: '{{doc.title}} {{doc.content}}', + }, + }, + }); + + let files = globSync(path.join(BUILD_DIR, '**/*.md')); console.log(`Found ${files.length} markdown files`); - console.log(files[0]); - exit(); - - // Process files into documents const documents = []; for (const filePath of files) { try { const content = fs.readFileSync(filePath, 'utf-8'); const { frontmatter, body } = extractFrontmatter(content); - const headings = extractHeadings(body); - const cleanedContent = cleanContent(body); const urlPath = getUrlPath(filePath, frontmatter); - const title = frontmatter.title || - frontmatter.sidebar_label || - headings[0] || - path.basename(filePath, path.extname(filePath)).replace(/-/g, ' '); + const title = frontmatter.title; + // console.log(urlPath); + const hierarchy = getHierarchy(filePath); const doc = { - id: generateId(urlPath), + id: createHash('md5').update(urlPath).digest('hex').substring(0, 12), title, - content: cleanedContent, // Limit content size + content: body, path: urlPath, section: frontmatter.sidebar_label || title, category: hierarchy.lvl0, diff --git a/website/scripts/shared.mjs b/website/scripts/shared.mjs new file mode 100644 index 00000000000..53faeb4cd84 --- /dev/null +++ b/website/scripts/shared.mjs @@ -0,0 +1,32 @@ +import path from 'path'; +import { fileURLToPath } from 'url'; + + +const SCRIPTS_DIR = path.dirname(fileURLToPath(import.meta.url)); + +export const DOCS_DIR = path.join(SCRIPTS_DIR, '../../docs'); +export const BUILD_DIR = path.join(SCRIPTS_DIR, '../build'); +export const BASE_URL = 'https://docs.near.org'; + + +export function extractFrontmatter(content) { + if (!content.startsWith('---\n')) return { frontmatter: {}, body: content }; + + const endIndex = content.indexOf('\n---\n', 4); + if (endIndex === -1) return { frontmatter: {}, body: content }; + + const frontmatterText = content.substring(4, endIndex); + const body = content.substring(endIndex + 5); + const frontmatter = {}; + + for (const line of frontmatterText.split('\n')) { + const colonIndex = line.indexOf(':'); + if (colonIndex > 0) { + const key = line.substring(0, colonIndex).trim(); + const value = line.substring(colonIndex + 1).trim().replace(/^["']|["']$/g, ''); + frontmatter[key] = value; + } + } + + return { frontmatter, body }; +} \ No newline at end of file diff --git a/website/src/theme/SearchBar/AIChatInSearch.tsx b/website/src/theme/SearchBar/AIChatInSearch.tsx new file mode 100644 index 00000000000..6c536b7d8f0 --- /dev/null +++ b/website/src/theme/SearchBar/AIChatInSearch.tsx @@ -0,0 +1,215 @@ +import React, { useState, useRef, useEffect } from 'react'; +import axios from 'axios'; +import posthog from 'posthog-js'; +import { useColorMode } from '@docusaurus/theme-common'; +import MarkdownRenderer from '../../components/AIChat/MarkdownRenderer'; +import Feedback from '../../components/AIChat/feedback'; +import styles from './styles.module.css'; + +interface Message { + id: number; + text: string; + sender: 'user' | 'ai'; +} + +interface AIChatInSearchProps { + initialQuery: string; + onSaveConversation?: (data: SavedConversation) => void; + savedConversation?: SavedConversation | null; +} + +export interface SavedConversation { + messages: Message[]; + threadId: string | null; +} + +const API_URL = 'https://tmp-docs-ai-service.onrender.com/api/chat'; + +const SUGGESTIONS = [ + 'How do I create a NEAR account?', + 'What is a smart contract on NEAR?', + 'How do cross-contract calls work?', + 'How to build a dApp on NEAR?', +]; + +export default function AIChatInSearch({ + initialQuery, + onSaveConversation, + savedConversation, +}: AIChatInSearchProps) { + const { colorMode } = useColorMode(); + const isDarkTheme = colorMode === 'dark'; + + const [messages, setMessages] = useState( + savedConversation?.messages || [], + ); + const [inputValue, setInputValue] = useState(''); + const [isLoading, setIsLoading] = useState(false); + const [threadId, setThreadId] = useState( + savedConversation?.threadId || null, + ); + const [seconds, setSeconds] = useState(1); + + const messagesEndRef = useRef(null); + const inputRef = useRef(null); + const hasSentInitial = useRef(false); + + // Save conversation whenever messages or threadId change + useEffect(() => { + if (messages.length > 0 && onSaveConversation) { + onSaveConversation({ messages, threadId }); + } + }, [messages, threadId, onSaveConversation]); + + useEffect(() => { + let timer: ReturnType; + if (isLoading) { + timer = setInterval(() => setSeconds((s) => s + 1), 1000); + } else { + setSeconds(1); + } + return () => clearInterval(timer); + }, [isLoading]); + + useEffect(() => { + messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); + }, [messages]); + + useEffect(() => { + if ( + initialQuery.trim() && + !hasSentInitial.current && + !savedConversation?.messages?.length + ) { + hasSentInitial.current = true; + sendMessage(initialQuery.trim()); + } + }, [initialQuery]); + + const sendMessage = async (text: string) => { + const userMsg: Message = { id: Date.now(), text, sender: 'user' }; + setMessages((prev) => [...prev, userMsg]); + setInputValue(''); + setIsLoading(true); + + try { + const response = await axios.post( + API_URL, + { messages: text, threadId }, + { headers: { 'Content-Type': 'application/json' } }, + ); + + const aiMsg: Message = { + id: Date.now() + 1, + text: response.data.message, + sender: 'ai', + }; + setMessages((prev) => [...prev, aiMsg]); + setThreadId(response.data.threadId); + } catch { + const errMsg: Message = { + id: Date.now() + 1, + text: 'Sorry, something went wrong. Please try again.', + sender: 'ai', + }; + setMessages((prev) => [...prev, errMsg]); + } finally { + setIsLoading(false); + inputRef.current?.focus(); + } + }; + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + if (inputValue.trim() && !isLoading) { + sendMessage(inputValue.trim()); + } + }; + + const handleFeedback = (choice: string) => { + posthog.capture('ai_chat_feedback', { + helpful: choice, + user_question: messages[messages.length - 2]?.text, + ai_answer: messages[messages.length - 1]?.text, + }); + }; + + const showSuggestions = messages.length === 0 && !isLoading; + + return ( +
    +
    + {showSuggestions && ( +
    +

    + ✨ Ask anything about NEAR +

    +
    + {SUGGESTIONS.map((s) => ( + + ))} +
    +
    + )} + + {messages.map((msg, idx) => ( +
    + {msg.sender === 'ai' ? ( + <> + + {idx === messages.length - 1 && !isLoading && ( +
    + +
    + )} + + ) : ( + {msg.text} + )} +
    + ))} + + {isLoading && ( +
    +
    + Thinking... ({seconds}s) +
    +
    + )} +
    +
    + +
    + setInputValue(e.target.value)} + disabled={isLoading} + /> + +
    +
    + ); +} diff --git a/website/src/theme/SearchBar/index.tsx b/website/src/theme/SearchBar/index.tsx index 022818b1828..03c0dca0312 100644 --- a/website/src/theme/SearchBar/index.tsx +++ b/website/src/theme/SearchBar/index.tsx @@ -4,6 +4,7 @@ import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; import { MeiliSearch } from 'meilisearch'; import { trackSearch, trackSearchResultClick, trackSearchNoResults } from '../../utils/searchAnalytics'; import { SearchIcon } from '../Icon/Search'; +import AIChatInSearch, { type SavedConversation } from './AIChatInSearch'; import styles from './styles.module.css'; interface SearchHit { @@ -31,15 +32,17 @@ interface SearchResult { const CATEGORIES = [ { id: 'all', label: 'All' }, - { id: 'Protocol', label: 'Protocol' }, - { id: 'Multi-Chain', label: 'Multi-Chain' }, - { id: 'AI & Agents', label: 'AI' }, - { id: 'Smart Contracts', label: 'Contracts' }, - { id: 'Web3 Apps', label: 'Web3 Apps' }, - { id: 'Primitives', label: 'Tokens & Primitives' }, - { id: 'Data Infrastructure', label: 'Data Infrastructure' }, - { id: 'Tools', label: 'Tools' }, - { id: 'API', label: 'API' }, + { id: 'protocol', label: 'Protocol' }, + { id: 'chain-abstraction', label: 'Multi-Chain' }, + { id: 'ai', label: 'AI & Agents' }, + { id: 'smart-contracts', label: 'Smart Contracts' }, + { id: 'web3-apps', label: 'Web3 Apps' }, + { id: 'primitives', label: 'Primitives' }, + { id: 'data-infrastructure', label: 'Data Infrastructure' }, + { id: 'tools', label: 'Tools' }, + { id: 'api', label: 'API' }, + { id: 'integrations', label: 'Integration Examples' }, + { id: 'aurora', label: 'Aurora' }, ]; export default function SearchBar(): JSX.Element { @@ -53,6 +56,9 @@ export default function SearchBar(): JSX.Element { const [selectedIndex, setSelectedIndex] = useState(0); const [selectedCategory, setSelectedCategory] = useState('all'); const [client, setClient] = useState(null); + const [mode, setMode] = useState<'search' | 'askDocs'>('search'); + const [askDocsQuery, setAskDocsQuery] = useState(''); + const [savedConversation, setSavedConversation] = useState(null); const inputRef = useRef(null); const resultsRef = useRef(null); @@ -73,6 +79,13 @@ export default function SearchBar(): JSX.Element { } }, [siteConfig]); + const closeModal = useCallback(() => { + setIsOpen(false); + setQuery(''); + setMode('search'); + setAskDocsQuery(''); + }, []); + useEffect(() => { const handleKeyDown = (e: KeyboardEvent) => { if ((e.metaKey || e.ctrlKey) && e.key === 'k') { @@ -80,13 +93,13 @@ export default function SearchBar(): JSX.Element { setIsOpen(true); } if (e.key === 'Escape') { - setIsOpen(false); + closeModal(); } }; document.addEventListener('keydown', handleKeyDown); return () => document.removeEventListener('keydown', handleKeyDown); - }, []); + }, [closeModal]); useEffect(() => { if (isOpen && inputRef.current) { @@ -144,7 +157,13 @@ export default function SearchBar(): JSX.Element { return () => clearTimeout(timer); }, [query, selectedCategory, search]); + const switchToAskDocs = () => { + setAskDocsQuery(query.trim()); + setMode('askDocs'); + }; + const handleKeyDown = (e: React.KeyboardEvent) => { + if (mode === 'askDocs') return; if (e.key === 'ArrowDown') { e.preventDefault(); setSelectedIndex(prev => Math.min(prev + 1, results.length - 1)); @@ -159,8 +178,7 @@ export default function SearchBar(): JSX.Element { const navigateToResult = (hit: SearchHit, index: number) => { trackSearchResultClick(query, index, hit.path); - setIsOpen(false); - setQuery(''); + closeModal(); history.push(hit.path); }; @@ -196,7 +214,7 @@ export default function SearchBar(): JSX.Element {