Skip to content

Commit 316c695

Browse files
authored
Merge pull request #404 from rokubop/feat/explorer-tag-curation
Updates for "Repository Explorer"
2 parents c56f85a + daeaf95 commit 316c695

5 files changed

Lines changed: 289 additions & 95 deletions

File tree

.github/workflows/deploy-ghpages.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ on:
55
branches:
66
- main
77
- emily/gh-pages-deploy
8+
schedule:
9+
# Rebuild weekly (Saturday night / Sunday 00:00 UTC)
10+
- cron: "0 0 * * 0"
11+
workflow_dispatch: # Allow manual trigger
812

913
jobs:
1014
build_and_deploy:

plugins/repo-data-omit-list.json

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,37 @@
11
{
2-
"description": "List of repositories to exclude from the Repository Explorer. Add repository full names (e.g., 'owner/repo-name') to the omitRepos array.",
3-
"omitRepos": ["voqal/voqal"]
2+
"description": "Configuration for Repository Explorer exclusions and tag normalization.",
3+
"omitRepos": ["voqal/voqal", "Jailsonfs/community"],
4+
"implicitTags": [
5+
"talonvoice",
6+
"talon",
7+
"voice",
8+
"voice-recognition",
9+
"speech-recognition",
10+
"voice-commands",
11+
"voice-control"
12+
],
13+
"tagAliases": {
14+
"a11y": "accessibility",
15+
"macos-accessibility": "accessibility",
16+
"maths": "math",
17+
"python3": "python",
18+
"hci": "human-computer-interaction",
19+
"games": "game",
20+
"garrysmod": "game",
21+
"gmod": "game",
22+
"slaythespire": "game",
23+
"slaythespire-mod": "game",
24+
"gameboyadvance": "game",
25+
"mgba": "game",
26+
"blazor-server": "blazor",
27+
"blazor-components": "blazor",
28+
"awesome": "list",
29+
"awesome-list": "list",
30+
"gpt": "ai",
31+
"openai": "ai",
32+
"chatgpt": "ai",
33+
"llm": "ai",
34+
"copilot": "ai"
35+
},
36+
"matchNamesToExistingTags": true
437
}

plugins/repo-data-plugin.js

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@ module.exports = function (context, options) {
1111
"../.docusaurus/repo-data-plugin/default/repos.json",
1212
);
1313

14-
// Load omit configuration from repo-explorer-omit-list.json
14+
// Load omit configuration from repo-data-omit-list.json
1515
let omitRepos = [];
16+
let implicitTags = [];
17+
let tagAliases = {};
18+
let matchNamesToExistingTags = false;
1619

1720
try {
1821
const omitListFile = path.join(__dirname, "repo-data-omit-list.json");
@@ -21,6 +24,21 @@ module.exports = function (context, options) {
2124
if (omitConfig.omitRepos && Array.isArray(omitConfig.omitRepos)) {
2225
omitRepos = omitConfig.omitRepos;
2326
}
27+
if (
28+
omitConfig.implicitTags &&
29+
Array.isArray(omitConfig.implicitTags)
30+
) {
31+
implicitTags = omitConfig.implicitTags;
32+
}
33+
if (
34+
omitConfig.tagAliases &&
35+
typeof omitConfig.tagAliases === "object"
36+
) {
37+
tagAliases = omitConfig.tagAliases;
38+
}
39+
if (omitConfig.matchNamesToExistingTags === true) {
40+
matchNamesToExistingTags = true;
41+
}
2442
}
2543
} catch (error) {
2644
console.warn("Failed to load repo-data-omit-list.json:", error.message);
@@ -33,7 +51,70 @@ module.exports = function (context, options) {
3351
console.log(
3452
`Repository omit list loaded: ${omitRepos.length} repositories will be excluded`,
3553
);
36-
} // Determine if we should fetch fresh data
54+
}
55+
56+
// canonicalTags is built after repos are loaded, since we need
57+
// to include tags that actually exist across repos.
58+
let canonicalTags = null;
59+
60+
function buildCanonicalTags(repos) {
61+
const tags = new Set(Object.values(tagAliases));
62+
repos.forEach((repo) => {
63+
repo.topics.forEach((t) => {
64+
const canonical = tagAliases[t] || t;
65+
if (!implicitTags.includes(canonical)) {
66+
tags.add(canonical);
67+
}
68+
});
69+
});
70+
return tags;
71+
}
72+
73+
/**
74+
* Split a repo name into words, handling kebab-case, snake_case,
75+
* camelCase, PascalCase, and mixed conventions.
76+
* e.g. "VoiceLauncherBlazor" -> ["voice", "launcher", "blazor"]
77+
* "talon-mouse-rig" -> ["talon", "mouse", "rig"]
78+
* "talon_mgba_http" -> ["talon", "mgba", "http"]
79+
*/
80+
function splitRepoName(name) {
81+
return (
82+
name
83+
// Insert boundary before uppercase runs: "VoiceLauncher" -> "Voice Launcher"
84+
.replace(/([a-z])([A-Z])/g, "$1 $2")
85+
// Split acronym from next word: "HTTPServer" -> "HTTP Server"
86+
.replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2")
87+
.split(/[-_.\s]+/)
88+
.map((w) => w.toLowerCase())
89+
.filter((w) => w.length > 2)
90+
);
91+
}
92+
93+
/**
94+
* Infer tags from a repo's name by matching words against known
95+
* canonical tags. Only adds tags the repo doesn't already have
96+
* (after alias resolution).
97+
*/
98+
function inferTags(repo) {
99+
if (!matchNamesToExistingTags || !canonicalTags) return;
100+
const words = splitRepoName(repo.name);
101+
const existingCanonical = new Set(
102+
repo.topics.map((t) => tagAliases[t] || t),
103+
);
104+
for (const word of words) {
105+
const canonical = tagAliases[word] || word;
106+
if (
107+
canonicalTags.has(canonical) &&
108+
!existingCanonical.has(canonical) &&
109+
!implicitTags.includes(canonical)
110+
) {
111+
repo.topics.push(word);
112+
existingCanonical.add(canonical);
113+
}
114+
}
115+
}
116+
117+
// Determine if we should fetch fresh data
37118
const isUpdateRepos =
38119
(process.env.npm_config_argv &&
39120
JSON.parse(process.env.npm_config_argv).original.includes(
@@ -74,12 +155,17 @@ module.exports = function (context, options) {
74155
);
75156
}
76157

158+
canonicalTags = buildCanonicalTags(filteredRepos);
159+
filteredRepos.forEach(inferTags);
160+
77161
return {
78162
...cachedData,
79163
repositories: filteredRepos,
80164
filtered_count: filteredRepos.length,
81165
omitted_count:
82166
(cachedData.repositories.length || 0) - filteredRepos.length,
167+
implicitTags,
168+
tagAliases,
83169
};
84170
}
85171
} catch (error) {
@@ -157,12 +243,17 @@ module.exports = function (context, options) {
157243
`After filtering: ${filteredRepos.length} repositories (${allRepos.length - filteredRepos.length} omitted)`,
158244
);
159245

246+
canonicalTags = buildCanonicalTags(filteredRepos);
247+
filteredRepos.forEach(inferTags);
248+
160249
return {
161250
repositories: filteredRepos,
162251
total_count: totalCount,
163252
filtered_count: filteredRepos.length,
164253
omitted_count: allRepos.length - filteredRepos.length,
165254
generated_at: new Date().toISOString(),
255+
implicitTags,
256+
tagAliases,
166257
};
167258
} catch (error) {
168259
console.error("Failed to fetch repository data:", error);
@@ -179,13 +270,18 @@ module.exports = function (context, options) {
179270
return !omitRepos.includes(fullName);
180271
});
181272

273+
canonicalTags = buildCanonicalTags(filteredRepos);
274+
filteredRepos.forEach(inferTags);
275+
182276
return {
183277
...cachedData,
184278
repositories: filteredRepos,
185279
filtered_count: filteredRepos.length,
186280
omitted_count:
187281
(cachedData.repositories.length || 0) - filteredRepos.length,
188282
error: `Build-time fetch failed: ${error.message}. Using cached data.`,
283+
implicitTags,
284+
tagAliases,
189285
};
190286
}
191287
} catch (cacheError) {
@@ -198,6 +294,8 @@ module.exports = function (context, options) {
198294
total_count: 0,
199295
generated_at: new Date().toISOString(),
200296
error: error.message,
297+
implicitTags,
298+
tagAliases,
201299
};
202300
}
203301
},

0 commit comments

Comments
 (0)