Skip to content

Commit b7c042f

Browse files
committed
Merge branch 'master' into variables-conversion
2 parents ee38850 + 7716ba7 commit b7c042f

35 files changed

+3299
-1160
lines changed

.NO_AI/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
AI AND LLM ARE FORBIDDEN
2+
========================
3+
4+
This project doesn't accept code or documentation generated
5+
by LLMs or any other forms of generative AI.
6+
7+
Please don't submit pull requests based on generative AI
8+
output, and don't use AI auto completion when you write
9+
pull requests.
10+
11+
Additionally, don't use LLMs to create pull requests or issues.
12+
Write them yourself, in English, even if you think your English
13+
is bad. Trust us, we prefer your writing to the LLM's voice.
14+
15+
Note: Exceptions may apply for individual ParadoxGameConverters
16+
projects at discretion of team leaders - consult with them first.
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
#!/usr/bin/env python3
2+
3+
import html
4+
import re
5+
import sys
6+
import urllib.error
7+
import urllib.request
8+
from datetime import datetime, timezone
9+
from html.parser import HTMLParser
10+
11+
12+
class TextExtractor(HTMLParser):
13+
def __init__(self):
14+
super().__init__()
15+
self.parts = []
16+
self.skip_depth = 0
17+
18+
def handle_starttag(self, tag, attrs):
19+
if tag in {"script", "style"}:
20+
self.skip_depth += 1
21+
return
22+
23+
if self.skip_depth > 0:
24+
return
25+
26+
if tag == "br":
27+
self.parts.append("\n")
28+
elif tag == "li":
29+
self.parts.append("\n- ")
30+
elif tag in {"p", "div", "tr", "section", "article", "header", "ul", "ol", "h1", "h2", "h3", "h4", "h5", "h6"}:
31+
self.parts.append("\n")
32+
elif tag in {"td", "th"}:
33+
self.parts.append(" ")
34+
35+
def handle_endtag(self, tag):
36+
if tag in {"script", "style"}:
37+
self.skip_depth = max(0, self.skip_depth - 1)
38+
return
39+
40+
if self.skip_depth > 0:
41+
return
42+
43+
if tag in {"p", "div", "tr", "section", "article", "header", "ul", "ol", "h1", "h2", "h3", "h4", "h5", "h6"}:
44+
self.parts.append("\n")
45+
46+
def handle_data(self, data):
47+
if self.skip_depth == 0:
48+
self.parts.append(data)
49+
50+
def text(self):
51+
return html.unescape("".join(self.parts))
52+
53+
54+
def fetch_page(base_url, headers, page_number):
55+
request = urllib.request.Request(
56+
f"{base_url}?p={page_number}",
57+
headers=headers,
58+
)
59+
with urllib.request.urlopen(request, timeout=30) as response:
60+
return response.read().decode("utf-8", "replace")
61+
62+
63+
def html_to_text(fragment):
64+
parser = TextExtractor()
65+
parser.feed(fragment)
66+
parser.close()
67+
text = parser.text()
68+
text = text.replace("\r\n", "\n").replace("\r", "\n")
69+
text = re.sub(r"(?i)\[/?(?:b|i|u|s|quote|code|list|olist|h\d)\]", "", text)
70+
text = re.sub(r"(?is)\[url=([^\]]+)\](.*?)\[/url\]", r"\2 (\1)", text)
71+
text = re.sub(r"(?is)\[img\].*?\[/img\]", "", text)
72+
text = re.sub(r"[ \t]+\n", "\n", text)
73+
text = re.sub(r"\n{3,}", "\n\n", text)
74+
return text.strip()
75+
76+
77+
def parse_timestamp(date_text, raw_html, raw_search_start):
78+
raw_index = raw_html.find(date_text, raw_search_start)
79+
if raw_index != -1:
80+
snippet = raw_html[max(0, raw_index - 600):raw_index + 600]
81+
attr_match = re.search(r'data-(?:timestamp|rtime(?:_updated)?|time_updated)="(\d+)"', snippet)
82+
if attr_match:
83+
return int(attr_match.group(1)), raw_index + len(date_text)
84+
85+
# Handle date format without year (e.g., "21 Mar @ 1:19pm")
86+
# Add current year; if parsing fails in the future, adjust to previous year
87+
from datetime import datetime as dt_module
88+
current_year = dt_module.now(timezone.utc).year
89+
try:
90+
parsed = datetime.strptime(f"{date_text} {current_year}", "%d %b @ %I:%M%p %Y")
91+
except ValueError:
92+
# If current year doesn't work, try previous year
93+
try:
94+
parsed = datetime.strptime(f"{date_text} {current_year - 1}", "%d %b @ %I:%M%p %Y")
95+
except ValueError:
96+
# Fallback: try the old format just in case
97+
try:
98+
parsed = datetime.strptime(date_text, "%d %b, %Y @ %I:%M%p")
99+
except ValueError:
100+
# If all else fails, return a safe fallback
101+
parsed = datetime.now(timezone.utc)
102+
103+
parsed = parsed.replace(tzinfo=timezone.utc)
104+
return int(parsed.timestamp()), raw_index + len(date_text) if raw_index != -1 else raw_search_start
105+
106+
107+
def main():
108+
if len(sys.argv) != 3:
109+
print("Usage: steam_changelog_scraper.py <workshop_id> <previous_ts>", file=sys.stderr)
110+
return 1
111+
112+
workshop_id = sys.argv[1]
113+
previous_ts = int(sys.argv[2])
114+
115+
base_url = f"https://steamcommunity.com/sharedfiles/filedetails/changelog/{workshop_id}"
116+
headers = {
117+
"User-Agent": (
118+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
119+
"(KHTML, like Gecko) Chrome/122.0 Safari/537.36"
120+
),
121+
"Accept-Language": "en-US,en;q=0.9",
122+
}
123+
124+
header_re = re.compile(
125+
r"Update:\s+(?P<date>\d{1,2}\s+[A-Za-z]{3}\s+@\s+\d{1,2}:\d{2}[ap]m)\s+by\s+(?P<author>.*?)(?:\n|$)"
126+
)
127+
128+
entries = []
129+
page_number = 1
130+
131+
try:
132+
while page_number <= 50:
133+
raw_html = fetch_page(base_url, headers, page_number)
134+
page_text = html_to_text(raw_html)
135+
matches = list(header_re.finditer(page_text))
136+
137+
if not matches:
138+
break
139+
140+
raw_search_start = 0
141+
for index, match in enumerate(matches):
142+
next_start = matches[index + 1].start() if index + 1 < len(matches) else len(page_text)
143+
body = page_text[match.end():next_start].strip()
144+
145+
footer_split = re.split(
146+
r"\n(?:Showing\s+\d+-\d+\s+of\s+\d+\s+entries|Additional Links)\b",
147+
body,
148+
maxsplit=1,
149+
)
150+
body = footer_split[0].strip()
151+
152+
entry_ts, raw_search_start = parse_timestamp(match.group("date"), raw_html, raw_search_start)
153+
if entry_ts <= previous_ts:
154+
raise StopIteration
155+
156+
if not body:
157+
body = "No changelog details were provided for this update."
158+
159+
entry_date = datetime.fromtimestamp(entry_ts, tz=timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
160+
entries.append(f"### {entry_date}\n\n{body}")
161+
162+
page_number += 1
163+
except StopIteration:
164+
pass
165+
except (urllib.error.URLError, TimeoutError, ValueError, OSError):
166+
entries = []
167+
168+
if entries:
169+
print("\n\n".join(entries))
170+
else:
171+
print("No changelog entries were found on Steam since the previous saved timestamp.")
172+
173+
return 0
174+
175+
176+
if __name__ == "__main__":
177+
raise SystemExit(main())

.github/workflows/ck3-tiger.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,7 @@ filter = {
423423
text = "missing english localization key utaki_religion_adherent_plural"
424424
text = "missing english localization key utaki_desc"
425425
text = "missing english localization key ngaiism_high_god_name_alternate"
426+
text = "missing english localization key AI_HOUSE_RELATION_ALL_MEMBERS_REASON"
426427
text = "diarchy parameter diarchy_type_is_grand_secretariat not defined in common/diarchies/diarchy_types/"
427428
text = "missing english localization key [adventurer|E]" # this is actually a ck3-tiger false positive
428429
text = "missing english localization key [administrative_government|E]" # this is actually a ck3-tiger false positive

0 commit comments

Comments
 (0)