diff --git a/data_raw_getData_2026-01-03_8.json b/data_raw_getData_2026-01-03_8.json new file mode 100644 index 0000000..45d059f --- /dev/null +++ b/data_raw_getData_2026-01-03_8.json @@ -0,0 +1,5 @@ +{ + "error": "Error with your request, please try again (you will not be charged for this request).You should: 1) check that your URL is correctly encoded 2) try with render_js=True (5 credits per request) 3) try with premium_proxy=True see documentation: https://www.scrapingbee.com/documentation#premium_proxy (10-25 credits per request) 4) try with stealth_proxy=True see documentation: https://www.scrapingbee.com/documentation#stealth_proxy (75 credits per request)Do not hesitate to check our troubleshooting guide:https://www.scrapingbee.com/help", + "reason": "Server responded with 500", + "help": "('Received response with content-encoding: br, but failed to decode it.', error("brotli: decoder process called with data when 'can_accept_more_data()' is False"))" +} \ No newline at end of file diff --git a/src/lib/server/external/api.scraping.helpers.ts b/src/lib/server/external/api.scraping.helpers.ts index 1d94676..1cb1e39 100755 --- a/src/lib/server/external/api.scraping.helpers.ts +++ b/src/lib/server/external/api.scraping.helpers.ts @@ -1,4 +1,5 @@ import { env } from "$env/dynamic/private"; +import { chunkArray } from "$lib/server/array.chunk"; import { logger } from "$lib/server/logger"; import { getULID, sleep } from "$lib/utils"; import { baseDistributorId, constants } from "$lib/utils/constants"; @@ -19,8 +20,12 @@ function dumpDistributorsRaw(distributors: any) { fs.writeFileSync("distributors_raw.json", JSON.stringify(distributors, null, 2)); } -function dumpDealersRaw(dealers: any) { - fs.writeFileSync("dealers_raw.json", JSON.stringify(dealers, null, 2)); +function dumpDealersRaw(dealers: any, prefix: string) { + fs.writeFileSync(`dealers_raw_${prefix}.json`, JSON.stringify(dealers, null, 2)); +} + +function dumpDataRaw(data: any, prefix: string) { + fs.writeFileSync(`data_raw_${prefix}.json`, JSON.stringify(data, null, 2)); } export const testIfSessionIsValid = async (jwt: string) => { @@ -174,60 +179,74 @@ export const getDealers = async (jwt: string, distributor_ids: string[]) => { } try { - const requests = distributor_ids.map(async (did) => { - await sleep(rng(100, 10000)); + const batches = chunkArray(distributor_ids, 5); + const allResponses: Array<{ + dealers: any[]; + ok: boolean; + code: number; + message: string; + }> = []; - const targetUrl = `${constants.SCRAP_API_URL}/v1/user/dealer-list`; - const scrapingbeeUrl = new URL("https://app.scrapingbee.com/api/v1"); - scrapingbeeUrl.searchParams.set("api_key", scrapingbeeApiKey); - scrapingbeeUrl.searchParams.set("url", targetUrl); - scrapingbeeUrl.searchParams.set("forward_headers", "true"); - scrapingbeeUrl.searchParams.set("forward_headers_pure", "true"); + // Process each batch sequentially + for (const batch of batches) { + const batchRequests = batch.map(async (did, index) => { + await sleep(rng(100, 2000)); - const res = await fetch(scrapingbeeUrl.toString(), { - method: "POST", - headers: { "Spb-Authorization": jwt, "Spb-Content-Type": "application/json" }, - body: JSON.stringify({ - page: 1, - pageSize: 999999, - parentDistributor: parseInt(did), - }), - }); + const targetUrl = `${constants.SCRAP_API_URL}/v1/user/dealer-list`; + const scrapingbeeUrl = new URL("https://app.scrapingbee.com/api/v1"); + scrapingbeeUrl.searchParams.set("api_key", scrapingbeeApiKey); + scrapingbeeUrl.searchParams.set("url", targetUrl); + scrapingbeeUrl.searchParams.set("forward_headers", "true"); + scrapingbeeUrl.searchParams.set("forward_headers_pure", "true"); - const data = (await res.json()) as { - code: number; - success: boolean; - message: string; - data: { - items: any[]; - total: number; + const res = await fetch(scrapingbeeUrl.toString(), { + method: "POST", + headers: { "Spb-Authorization": jwt, "Spb-Content-Type": "application/json" }, + body: JSON.stringify({ + page: 1, + pageSize: 999999, + parentDistributor: parseInt(did), + }), + }); + + const data = (await res.json()) as { + code: number; + success: boolean; + message: string; + data: { + items: any[]; + total: number; + }; }; - }; - dumpDealersRaw(data); + dumpDealersRaw(data, `batch_${index}_${did}`); + + if (data.code !== 200 || !data.success) { + return { + dealers: [], + ok: false, + code: data.code, + message: data.message, + }; + } + const dealers = data.data.items.map((item) => item.dealer); - if (data.code !== 200 || !data.success) { return { - dealers: [], - ok: false, + dealers, + ok: res.status === 200 && data.success, code: data.code, message: data.message, }; - } - const dealers = data.data.items.map((item) => item.dealer); + }); - return { - dealers, - ok: res.status === 200 && data.success, - code: data.code, - message: data.message, - }; - }); + // Wait for all requests in this batch to complete before moving to next batch + const batchResponses = await Promise.all(batchRequests); + allResponses.push(...batchResponses); + } - const responses = await Promise.all(requests); const dealers: LooseApiUser[] = []; const errors: { message: string }[] = []; - for (const res of responses) { + for (const res of allResponses) { if (res.code !== 200 || !res.ok) { errors.push({ message: res.message }); continue; @@ -345,16 +364,29 @@ export const getData = async ( return { ok: false, message: "ScrapingBee API key not configured", data: [] }; } + logger.info(`[getData] Fetching draw data from API for ${chosenDate} ${drawId}`); + const targetUrl = `${constants.SCRAP_API_URL}/v1/book/list2`; const scrapingbeeUrl = new URL("https://app.scrapingbee.com/api/v1"); scrapingbeeUrl.searchParams.set("api_key", scrapingbeeApiKey); scrapingbeeUrl.searchParams.set("url", targetUrl); + scrapingbeeUrl.searchParams.set("block_resources", "true"); scrapingbeeUrl.searchParams.set("forward_headers", "true"); scrapingbeeUrl.searchParams.set("forward_headers_pure", "true"); + scrapingbeeUrl.searchParams.set("transparent_status_code", "true"); + + const forwardHeaders = Object.fromEntries( + Object.entries(constants.SCRAP_API_BASE_HEADERS).map(([key, value]) => [`Spb-${key}`, value]), + ); const res = await fetch(scrapingbeeUrl.toString(), { method: "POST", - headers: { "Spb-Authorization": jwt }, + headers: { + "Spb-Authorization": jwt, + "Spb-Content-Type": "application/json", + ...forwardHeaders, + "Spb-Accept-Encoding": "gzip, deflate, br, zstd", + }, body: JSON.stringify({ userType: 3, userIds, @@ -373,10 +405,19 @@ export const getData = async ( message: string; data: { book: BookingEntry; user: any }[]; }; + + res.headers.forEach((value, key) => { + logger.debug(`[getData] response headers - ${key}: ${value}`); + }); + let decoded = (await res.json()) as { data: J }; - const json = (decoded.data.success ? decoded.data : decoded) as any as J; + + dumpDataRaw(decoded, `getData_${chosenDate}_${drawId}`); + + const json = (decoded.data && decoded.data.success ? decoded.data : decoded) as any as J; + if (json.code !== 200 || !json.success || !json.data) { - logger.warn(`[getData] Error: ${json.message}`); + logger.warn(`[getData] Error: ${JSON.stringify(json)}`); return { ok: false, message: json.message, data: [] }; }