@@ -11,8 +11,11 @@ module.exports = function (context, options) {
1111 "../.docusaurus/repo-data-plugin/default/repos.json" ,
1212 ) ;
1313
14- // Load omit configuration from repo-explorer -omit-list.json
14+ // Load omit configuration from repo-data -omit-list.json
1515 let omitRepos = [ ] ;
16+ let implicitTags = [ ] ;
17+ let tagAliases = { } ;
18+ let matchNamesToExistingTags = false ;
1619
1720 try {
1821 const omitListFile = path . join ( __dirname , "repo-data-omit-list.json" ) ;
@@ -21,6 +24,21 @@ module.exports = function (context, options) {
2124 if ( omitConfig . omitRepos && Array . isArray ( omitConfig . omitRepos ) ) {
2225 omitRepos = omitConfig . omitRepos ;
2326 }
27+ if (
28+ omitConfig . implicitTags &&
29+ Array . isArray ( omitConfig . implicitTags )
30+ ) {
31+ implicitTags = omitConfig . implicitTags ;
32+ }
33+ if (
34+ omitConfig . tagAliases &&
35+ typeof omitConfig . tagAliases === "object"
36+ ) {
37+ tagAliases = omitConfig . tagAliases ;
38+ }
39+ if ( omitConfig . matchNamesToExistingTags === true ) {
40+ matchNamesToExistingTags = true ;
41+ }
2442 }
2543 } catch ( error ) {
2644 console . warn ( "Failed to load repo-data-omit-list.json:" , error . message ) ;
@@ -33,7 +51,70 @@ module.exports = function (context, options) {
3351 console . log (
3452 `Repository omit list loaded: ${ omitRepos . length } repositories will be excluded` ,
3553 ) ;
36- } // Determine if we should fetch fresh data
54+ }
55+
56+ // canonicalTags is built after repos are loaded, since we need
57+ // to include tags that actually exist across repos.
58+ let canonicalTags = null ;
59+
60+ function buildCanonicalTags ( repos ) {
61+ const tags = new Set ( Object . values ( tagAliases ) ) ;
62+ repos . forEach ( ( repo ) => {
63+ repo . topics . forEach ( ( t ) => {
64+ const canonical = tagAliases [ t ] || t ;
65+ if ( ! implicitTags . includes ( canonical ) ) {
66+ tags . add ( canonical ) ;
67+ }
68+ } ) ;
69+ } ) ;
70+ return tags ;
71+ }
72+
73+ /**
74+ * Split a repo name into words, handling kebab-case, snake_case,
75+ * camelCase, PascalCase, and mixed conventions.
76+ * e.g. "VoiceLauncherBlazor" -> ["voice", "launcher", "blazor"]
77+ * "talon-mouse-rig" -> ["talon", "mouse", "rig"]
78+ * "talon_mgba_http" -> ["talon", "mgba", "http"]
79+ */
80+ function splitRepoName ( name ) {
81+ return (
82+ name
83+ // Insert boundary before uppercase runs: "VoiceLauncher" -> "Voice Launcher"
84+ . replace ( / ( [ a - z ] ) ( [ A - Z ] ) / g, "$1 $2" )
85+ // Split acronym from next word: "HTTPServer" -> "HTTP Server"
86+ . replace ( / ( [ A - Z ] + ) ( [ A - Z ] [ a - z ] ) / g, "$1 $2" )
87+ . split ( / [ - _ . \s ] + / )
88+ . map ( ( w ) => w . toLowerCase ( ) )
89+ . filter ( ( w ) => w . length > 2 )
90+ ) ;
91+ }
92+
93+ /**
94+ * Infer tags from a repo's name by matching words against known
95+ * canonical tags. Only adds tags the repo doesn't already have
96+ * (after alias resolution).
97+ */
98+ function inferTags ( repo ) {
99+ if ( ! matchNamesToExistingTags || ! canonicalTags ) return ;
100+ const words = splitRepoName ( repo . name ) ;
101+ const existingCanonical = new Set (
102+ repo . topics . map ( ( t ) => tagAliases [ t ] || t ) ,
103+ ) ;
104+ for ( const word of words ) {
105+ const canonical = tagAliases [ word ] || word ;
106+ if (
107+ canonicalTags . has ( canonical ) &&
108+ ! existingCanonical . has ( canonical ) &&
109+ ! implicitTags . includes ( canonical )
110+ ) {
111+ repo . topics . push ( word ) ;
112+ existingCanonical . add ( canonical ) ;
113+ }
114+ }
115+ }
116+
117+ // Determine if we should fetch fresh data
37118 const isUpdateRepos =
38119 ( process . env . npm_config_argv &&
39120 JSON . parse ( process . env . npm_config_argv ) . original . includes (
@@ -74,12 +155,17 @@ module.exports = function (context, options) {
74155 ) ;
75156 }
76157
158+ canonicalTags = buildCanonicalTags ( filteredRepos ) ;
159+ filteredRepos . forEach ( inferTags ) ;
160+
77161 return {
78162 ...cachedData ,
79163 repositories : filteredRepos ,
80164 filtered_count : filteredRepos . length ,
81165 omitted_count :
82166 ( cachedData . repositories . length || 0 ) - filteredRepos . length ,
167+ implicitTags,
168+ tagAliases,
83169 } ;
84170 }
85171 } catch ( error ) {
@@ -157,12 +243,17 @@ module.exports = function (context, options) {
157243 `After filtering: ${ filteredRepos . length } repositories (${ allRepos . length - filteredRepos . length } omitted)` ,
158244 ) ;
159245
246+ canonicalTags = buildCanonicalTags ( filteredRepos ) ;
247+ filteredRepos . forEach ( inferTags ) ;
248+
160249 return {
161250 repositories : filteredRepos ,
162251 total_count : totalCount ,
163252 filtered_count : filteredRepos . length ,
164253 omitted_count : allRepos . length - filteredRepos . length ,
165254 generated_at : new Date ( ) . toISOString ( ) ,
255+ implicitTags,
256+ tagAliases,
166257 } ;
167258 } catch ( error ) {
168259 console . error ( "Failed to fetch repository data:" , error ) ;
@@ -179,13 +270,18 @@ module.exports = function (context, options) {
179270 return ! omitRepos . includes ( fullName ) ;
180271 } ) ;
181272
273+ canonicalTags = buildCanonicalTags ( filteredRepos ) ;
274+ filteredRepos . forEach ( inferTags ) ;
275+
182276 return {
183277 ...cachedData ,
184278 repositories : filteredRepos ,
185279 filtered_count : filteredRepos . length ,
186280 omitted_count :
187281 ( cachedData . repositories . length || 0 ) - filteredRepos . length ,
188282 error : `Build-time fetch failed: ${ error . message } . Using cached data.` ,
283+ implicitTags,
284+ tagAliases,
189285 } ;
190286 }
191287 } catch ( cacheError ) {
@@ -198,6 +294,8 @@ module.exports = function (context, options) {
198294 total_count : 0 ,
199295 generated_at : new Date ( ) . toISOString ( ) ,
200296 error : error . message ,
297+ implicitTags,
298+ tagAliases,
201299 } ;
202300 }
203301 } ,
0 commit comments