Skip to content

Commit 2e0c151

Browse files
authored
Merge pull request #30 from rusq/i29
use full browser headers
2 parents 4dff437 + 78b475d commit 2e0c151

3 files changed

Lines changed: 180 additions & 5 deletions

File tree

aklapi.go

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,16 @@ var (
1010
)
1111

1212
// userAgent is sent with all outgoing HTTP requests. The Auckland Council
13-
// website CDN (Fastly) returns 406 for requests that identify as Go's default
14-
// http client, so we send a browser-compatible value instead.
15-
const userAgent = "Mozilla/5.0 (compatible; aklapi/1.0)"
13+
// collection page returns 406 unless requests resemble a modern browser.
14+
const userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
15+
16+
const (
17+
defaultAccept = "application/json, text/html, */*"
18+
browserAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
19+
browserAcceptLanguage = "en-NZ,en;q=0.9,en-US;q=0.8"
20+
secCHUA = "\"Chromium\";v=\"135\", \"Not.A/Brand\";v=\"8\""
21+
secCHUAPlatform = "\"macOS\""
22+
)
1623

1724
// browserTransport is an http.RoundTripper that adds browser-like headers to
1825
// every request before forwarding it to the underlying transport.
@@ -27,7 +34,30 @@ func (t *browserTransport) RoundTrip(req *http.Request) (*http.Response, error)
2734
r.Header.Set("User-Agent", userAgent)
2835
}
2936
if r.Header.Get("Accept") == "" {
30-
r.Header.Set("Accept", "application/json, text/html, */*")
37+
r.Header.Set("Accept", defaultAccept)
3138
}
32-
return t.wrapped.RoundTrip(r)
39+
if r.Header.Get("Accept-Language") == "" {
40+
r.Header.Set("Accept-Language", browserAcceptLanguage)
41+
}
42+
43+
wrapped := t.wrapped
44+
if wrapped == nil {
45+
wrapped = http.DefaultTransport
46+
}
47+
return wrapped.RoundTrip(r)
48+
}
49+
50+
// setBrowserDocumentHeaders applies the navigation-style headers required by
51+
// the collection HTML page.
52+
func setBrowserDocumentHeaders(r *http.Request) {
53+
r.Header.Set("Accept", browserAccept)
54+
r.Header.Set("Cache-Control", "max-age=0")
55+
r.Header.Set("Upgrade-Insecure-Requests", "1")
56+
r.Header.Set("Sec-Fetch-Site", "none")
57+
r.Header.Set("Sec-Fetch-Mode", "navigate")
58+
r.Header.Set("Sec-Fetch-User", "?1")
59+
r.Header.Set("Sec-Fetch-Dest", "document")
60+
r.Header.Set("Sec-CH-UA", secCHUA)
61+
r.Header.Set("Sec-CH-UA-Mobile", "?0")
62+
r.Header.Set("Sec-CH-UA-Platform", secCHUAPlatform)
3363
}

rubbish.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,14 @@ func fetchandparse(ctx context.Context, addressID string) (*CollectionDayDetailR
120120
if err != nil {
121121
return nil, err
122122
}
123+
setBrowserDocumentHeaders(req)
123124
resp, err := collectionHTTPClient.Do(req)
124125
if err != nil {
125126
return nil, err
126127
}
127128
defer resp.Body.Close()
128129
if resp.StatusCode != http.StatusOK {
130+
slog.WarnContext(ctx, "collection request failed", "status", resp.StatusCode, "url", req.URL.String())
129131
return nil, fmt.Errorf("collection API returned status code: %d", resp.StatusCode)
130132
}
131133
return parse(resp.Body)

rubbish_test.go

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,149 @@ func TestFetchAndParse_StatusCodeError(t *testing.T) {
186186
}
187187
}
188188

189+
func TestCollectionRequestHeaders(t *testing.T) {
190+
t.Cleanup(func() {
191+
NoCache = false
192+
})
193+
NoCache = true
194+
195+
t.Run("fetchandparse uses browser headers", func(t *testing.T) {
196+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
197+
if got := r.Header.Get("User-Agent"); got != userAgent {
198+
http.Error(w, "missing browser user-agent", http.StatusNotAcceptable)
199+
return
200+
}
201+
if got := r.Header.Get("Accept"); got != browserAccept {
202+
http.Error(w, "missing browser accept", http.StatusNotAcceptable)
203+
return
204+
}
205+
if got := r.Header.Get("Accept-Language"); got != browserAcceptLanguage {
206+
http.Error(w, "missing browser accept-language", http.StatusNotAcceptable)
207+
return
208+
}
209+
if got := r.Header.Get("Cache-Control"); got != "max-age=0" {
210+
http.Error(w, "missing cache-control", http.StatusNotAcceptable)
211+
return
212+
}
213+
if got := r.Header.Get("Upgrade-Insecure-Requests"); got != "1" {
214+
http.Error(w, "missing upgrade-insecure-requests", http.StatusNotAcceptable)
215+
return
216+
}
217+
if got := r.Header.Get("Sec-Fetch-Site"); got != "none" {
218+
http.Error(w, "missing sec-fetch-site", http.StatusNotAcceptable)
219+
return
220+
}
221+
if got := r.Header.Get("Sec-Fetch-Mode"); got != "navigate" {
222+
http.Error(w, "missing sec-fetch-mode", http.StatusNotAcceptable)
223+
return
224+
}
225+
if got := r.Header.Get("Sec-Fetch-User"); got != "?1" {
226+
http.Error(w, "missing sec-fetch-user", http.StatusNotAcceptable)
227+
return
228+
}
229+
if got := r.Header.Get("Sec-Fetch-Dest"); got != "document" {
230+
http.Error(w, "missing sec-fetch-dest", http.StatusNotAcceptable)
231+
return
232+
}
233+
if got := r.Header.Get("Sec-CH-UA"); got != secCHUA {
234+
http.Error(w, "missing sec-ch-ua", http.StatusNotAcceptable)
235+
return
236+
}
237+
if got := r.Header.Get("Sec-CH-UA-Mobile"); got != "?0" {
238+
http.Error(w, "missing sec-ch-ua-mobile", http.StatusNotAcceptable)
239+
return
240+
}
241+
if got := r.Header.Get("Sec-CH-UA-Platform"); got != secCHUAPlatform {
242+
http.Error(w, "missing sec-ch-ua-platform", http.StatusNotAcceptable)
243+
return
244+
}
245+
w.Write([]byte(taRsd1LuandaDrive))
246+
}))
247+
t.Cleanup(srv.Close)
248+
249+
oldURI := collectionDayURI
250+
oldClient := collectionHTTPClient
251+
t.Cleanup(func() {
252+
collectionDayURI = oldURI
253+
collectionHTTPClient = oldClient
254+
})
255+
256+
collectionDayURI = srv.URL + "/rubbish/%s"
257+
collectionHTTPClient = &http.Client{
258+
Timeout: time.Second,
259+
Transport: &browserTransport{wrapped: srv.Client().Transport},
260+
}
261+
262+
got, err := fetchandparse(t.Context(), "42")
263+
if err != nil {
264+
t.Fatalf("fetchandparse() error = %v", err)
265+
}
266+
if got == nil {
267+
t.Fatal("fetchandparse() returned nil result")
268+
}
269+
})
270+
271+
t.Run("collection day detail recovers from 406 with browser headers", func(t *testing.T) {
272+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
273+
switch r.URL.Path {
274+
case "/addr":
275+
writeAddrJSON(w, AddrResponse{Items: []Address{*testAddr}})
276+
case "/rubbish/42":
277+
if r.Header.Get("User-Agent") == userAgent &&
278+
r.Header.Get("Accept") == browserAccept &&
279+
r.Header.Get("Accept-Language") == browserAcceptLanguage &&
280+
r.Header.Get("Cache-Control") == "max-age=0" &&
281+
r.Header.Get("Upgrade-Insecure-Requests") == "1" &&
282+
r.Header.Get("Sec-Fetch-Site") == "none" &&
283+
r.Header.Get("Sec-Fetch-Mode") == "navigate" &&
284+
r.Header.Get("Sec-Fetch-User") == "?1" &&
285+
r.Header.Get("Sec-Fetch-Dest") == "document" &&
286+
r.Header.Get("Sec-CH-UA") == secCHUA &&
287+
r.Header.Get("Sec-CH-UA-Mobile") == "?0" &&
288+
r.Header.Get("Sec-CH-UA-Platform") == secCHUAPlatform {
289+
w.Write([]byte(taRsd1LuandaDrive))
290+
return
291+
}
292+
http.Error(w, "collection API returned status code: 406", http.StatusNotAcceptable)
293+
default:
294+
http.NotFound(w, r)
295+
}
296+
}))
297+
t.Cleanup(srv.Close)
298+
299+
oldAddrURI := addrURI
300+
oldCollectionDayURI := collectionDayURI
301+
oldAddrClient := addrHTTPClient
302+
oldCollectionClient := collectionHTTPClient
303+
t.Cleanup(func() {
304+
addrURI = oldAddrURI
305+
collectionDayURI = oldCollectionDayURI
306+
addrHTTPClient = oldAddrClient
307+
collectionHTTPClient = oldCollectionClient
308+
})
309+
310+
addrURI = srv.URL + "/addr"
311+
collectionDayURI = srv.URL + "/rubbish/%s"
312+
addrHTTPClient = &http.Client{
313+
Timeout: time.Second,
314+
Transport: &browserTransport{wrapped: srv.Client().Transport},
315+
}
316+
collectionHTTPClient = &http.Client{
317+
Timeout: time.Second,
318+
Transport: &browserTransport{wrapped: srv.Client().Transport},
319+
}
320+
321+
got, err := CollectionDayDetail(t.Context(), "500 Queen Street")
322+
if err != nil {
323+
t.Fatalf("CollectionDayDetail() error = %v", err)
324+
}
325+
326+
assert.NotNil(t, got)
327+
assert.Equal(t, testAddr, got.Address)
328+
assert.Len(t, got.Collections, 3)
329+
})
330+
}
331+
189332
func TestCollectionDayDetailResult_NextRubbish(t *testing.T) {
190333
type fields struct {
191334
Collections []RubbishCollection

0 commit comments

Comments
 (0)