yuh and so it beginzzzzz
This commit is contained in:
141
pyapi/main.py
141
pyapi/main.py
@@ -1,8 +1,9 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Dict, List, Optional
|
||||
from urllib.parse import urlencode
|
||||
from urllib.parse import urlencode, quote
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
@@ -24,9 +25,9 @@ app = FastAPI()
|
||||
|
||||
logger.info("FastAPI Proxy Server initialized")
|
||||
|
||||
SCRAPERAPI_API_KEY = os.getenv("SCRAPERAPI_API_KEY")
|
||||
if not SCRAPERAPI_API_KEY:
|
||||
raise ValueError("SCRAPERAPI_API_KEY is not set")
|
||||
SCRAPINGBEE_API_KEY = os.getenv("SCRAPINGBEE_API_KEY")
|
||||
if not SCRAPINGBEE_API_KEY:
|
||||
raise ValueError("SCRAPINGBEE_API_KEY is not set")
|
||||
|
||||
|
||||
CONSTANTS = {
|
||||
@@ -36,7 +37,7 @@ CONSTANTS = {
|
||||
"LAST_FETCHED_KEY": "LAST_FETCHED",
|
||||
"SCRAP_API_URL": "https://gamebooking24.com/lottery-api",
|
||||
"SCRAP_API_SESSION_KEY": "SRAJWT",
|
||||
"SCRAPERAPI_BASE_URL": "http://api.scraperapi.com",
|
||||
"SCRAPINGBEE_BASE_URL": "https://app.scrapingbee.com/api/v1",
|
||||
"SCRAP_API_BASE_HEADERS": {
|
||||
"Host": "gamebooking24.com",
|
||||
"Sec-Ch-Ua": '"Not/A)Brand";v="8", "Chromium";v="126"',
|
||||
@@ -95,105 +96,129 @@ def build_headers(
|
||||
async def make_get_request(
|
||||
url: str, params: Optional[Dict] = None, headers: Optional[Dict] = None
|
||||
):
|
||||
"""Make a GET request using ScraperAPI"""
|
||||
if SCRAPERAPI_API_KEY == "<TODO: get and put the key in here>":
|
||||
raise HTTPException(status_code=500, detail="ScraperAPI API key not configured")
|
||||
|
||||
# Build the ScraperAPI request params
|
||||
scraperapi_params = {
|
||||
"api_key": SCRAPERAPI_API_KEY,
|
||||
"url": url,
|
||||
"render": "true",
|
||||
}
|
||||
|
||||
"""Make a GET request using ScrapingBee"""
|
||||
# Add query params to the target URL if provided
|
||||
if params:
|
||||
url_with_params = f"{url}?{urlencode(params)}"
|
||||
scraperapi_params["url"] = url_with_params
|
||||
else:
|
||||
url_with_params = url
|
||||
|
||||
# Make the request to ScraperAPI using aiohttp
|
||||
logger.debug(f"[ScrapingBee GET] Target URL: {url_with_params}")
|
||||
|
||||
# Build the ScrapingBee request params
|
||||
# Note: aiohttp will automatically URL-encode the params, including the 'url' value
|
||||
scrapingbee_params = {
|
||||
"api_key": SCRAPINGBEE_API_KEY,
|
||||
"url": url_with_params,
|
||||
"render_js": "true",
|
||||
"block_resources": "false",
|
||||
"transparent_status_code": "true", # Pass through the actual status code from target site
|
||||
}
|
||||
|
||||
# Forward headers to target site if provided (for Authorization, etc.)
|
||||
if headers and "Authorization" in headers:
|
||||
scrapingbee_params["forward_headers"] = "true"
|
||||
|
||||
# Make the request to ScrapingBee using aiohttp
|
||||
# Note: Don't pass custom headers to ScrapingBee - they're for the target site
|
||||
# If needed, use ScrapingBee's forward_headers parameter instead
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
CONSTANTS["SCRAPERAPI_BASE_URL"],
|
||||
params=scraperapi_params,
|
||||
headers=headers,
|
||||
CONSTANTS["SCRAPINGBEE_BASE_URL"],
|
||||
params=scrapingbee_params,
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
# Create a simple response-like object
|
||||
class AsyncResponse:
|
||||
def __init__(self, aiohttp_response):
|
||||
self._response = aiohttp_response
|
||||
self.status_code = aiohttp_response.status
|
||||
self.headers = aiohttp_response.headers
|
||||
# Read content before context manager exits
|
||||
content = await response.read()
|
||||
|
||||
# Log error responses for debugging
|
||||
if response.status != 200:
|
||||
try:
|
||||
error_text = content.decode('utf-8')[:500]
|
||||
logger.error(f"[ScrapingBee GET] Status {response.status}, Response: {error_text}")
|
||||
except:
|
||||
logger.error(f"[ScrapingBee GET] Status {response.status}, Response (non-text): {len(content)} bytes")
|
||||
|
||||
# Create a simple response object with the data
|
||||
class SimpleResponse:
|
||||
def __init__(self, status, headers, content_bytes):
|
||||
self.status_code = status
|
||||
self.headers = headers
|
||||
self._content = content_bytes
|
||||
self._text = None
|
||||
self._json = None
|
||||
self._content = None
|
||||
|
||||
async def text(self):
|
||||
if self._text is None:
|
||||
self._text = await self._response.text()
|
||||
self._text = self._content.decode('utf-8')
|
||||
return self._text
|
||||
|
||||
async def json(self):
|
||||
if self._json is None:
|
||||
self._json = await self._response.json()
|
||||
self._json = json.loads(await self.text())
|
||||
return self._json
|
||||
|
||||
async def content(self):
|
||||
if self._content is None:
|
||||
self._content = await self._response.read()
|
||||
return self._content
|
||||
|
||||
return AsyncResponse(response)
|
||||
return SimpleResponse(response.status, response.headers, content)
|
||||
|
||||
|
||||
async def make_post_request(url: str, data: dict, headers: Optional[Dict] = None):
|
||||
"""Make a POST request using ScraperAPI"""
|
||||
if SCRAPERAPI_API_KEY == "<TODO: get and put the key in here>":
|
||||
raise HTTPException(status_code=500, detail="ScraperAPI API key not configured")
|
||||
"""Make a POST request using ScrapingBee"""
|
||||
|
||||
# Build the ScraperAPI request params
|
||||
scraperapi_params = {
|
||||
"api_key": SCRAPERAPI_API_KEY,
|
||||
# Build the ScrapingBee request params
|
||||
scrapingbee_params = {
|
||||
"api_key": SCRAPINGBEE_API_KEY,
|
||||
"url": url,
|
||||
"render": "true",
|
||||
"render_js": "true",
|
||||
"block_resources": "false",
|
||||
}
|
||||
|
||||
# Make the POST request to ScraperAPI using aiohttp
|
||||
# ScrapingBee POST requests: pass JSON body as a parameter
|
||||
scrapingbee_params["body"] = json.dumps(data)
|
||||
|
||||
# Forward headers to target site if provided
|
||||
# Note: ScrapingBee's forward_headers forwards common headers automatically
|
||||
# For custom headers like Authorization, we may need to use cookies parameter
|
||||
if headers and "Authorization" in headers:
|
||||
scrapingbee_params["forward_headers"] = "true"
|
||||
# TODO: May need to pass Authorization via cookies if forward_headers doesn't work
|
||||
|
||||
# Make the POST request to ScrapingBee using aiohttp
|
||||
# ScrapingBee HTML API uses GET even for POST requests - the body is passed as a param
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
CONSTANTS["SCRAPERAPI_BASE_URL"],
|
||||
params=scraperapi_params,
|
||||
json=data, # Use json= for JSON payloads (sets Content-Type automatically)
|
||||
headers=headers,
|
||||
async with session.get(
|
||||
CONSTANTS["SCRAPINGBEE_BASE_URL"],
|
||||
params=scrapingbee_params,
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
# Create a simple response-like object
|
||||
class AsyncResponse:
|
||||
def __init__(self, aiohttp_response):
|
||||
self._response = aiohttp_response
|
||||
self.status_code = aiohttp_response.status
|
||||
self.headers = aiohttp_response.headers
|
||||
# Read content before context manager exits
|
||||
content = await response.read()
|
||||
|
||||
# Create a simple response object with the data
|
||||
class SimpleResponse:
|
||||
def __init__(self, status, headers, content_bytes):
|
||||
self.status_code = status
|
||||
self.headers = headers
|
||||
self._content = content_bytes
|
||||
self._text = None
|
||||
self._json = None
|
||||
self._content = None
|
||||
|
||||
async def text(self):
|
||||
if self._text is None:
|
||||
self._text = await self._response.text()
|
||||
self._text = self._content.decode('utf-8')
|
||||
return self._text
|
||||
|
||||
async def json(self):
|
||||
if self._json is None:
|
||||
self._json = await self._response.json()
|
||||
self._json = json.loads(await self.text())
|
||||
return self._json
|
||||
|
||||
async def content(self):
|
||||
if self._content is None:
|
||||
self._content = await self._response.read()
|
||||
return self._content
|
||||
|
||||
return AsyncResponse(response)
|
||||
return SimpleResponse(response.status, response.headers, content)
|
||||
|
||||
|
||||
# Pydantic models for request bodies
|
||||
|
||||
Reference in New Issue
Block a user