Description:
I'm experiencing an issue with nodriver when trying to intercept and retrieve network response bodies using the Chrome DevTools Protocol (CDP). In my script, I register a handler for the Network.ResponseReceived
event and then call Network.getResponseBody
with the corresponding request ID. Although the handler correctly detects that a response matching my criteria is received, the subsequent call to Network.getResponseBody
never returns the expected data (or returns empty data), even though the response is known to contain valid JSON.
Steps to Reproduce:
Initialize the nodriver Browser:
Enable Network Monitoring:
Network.enable
command.Register a Response Handler:
Network.ResponseReceived
event."operationName=Marketplace&variables"
.Network.getResponseBody
.Expected Behavior:
Network.getResponseBody
should return the full response body (decoded if necessary) so that the JSON data can be processed.Actual Behavior:
Network.getResponseBody
does not return the expected data.What I've Tried:
Alternative Workarounds:
Enabling Interception Early:
Network.enable
is called early and that the response handler is registered before the requests are sent.Delaying and Retrying:
Researching Similar Issues:
Environment:
Reproducible Code Snippet:
import asyncio
import logging
import re
import os
import base64
from collections import defaultdict
import nodriver # Ensure you have nodriver installed
class ESPNBetFetcher:
def __init__(self, leagues, max_concurrent=20):
self.logger = logging.getLogger("espnbet_fetcher")
self.logger.setLevel(logging.INFO)
log_path = os.path.join(os.path.dirname(__file__), 'espnbet_fetch.log')
os.makedirs(os.path.dirname(log_path), exist_ok=True)
handler = logging.FileHandler(log_path, encoding='utf-8')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
self.logger.handlers.clear()
self.logger.addHandler(handler)
self.leagues = leagues
self.max_concurrent = max_concurrent
self.browser = None
async def startup(self):
self.logger.info("Starting nodriver browser...")
self.browser = await nodriver.start()
if not self.browser:
raise RuntimeError("Failed to initialize browser")
self.logger.info("Browser started successfully")
async def cleanup(self):
if self.browser:
self.logger.info("Cleaning up browser...")
try:
self.browser.stop() # nodriver's stop() is not async
except Exception as e:
self.logger.error(f"Error during cleanup: {e}")
self.browser = None
self.logger.info("Browser cleaned up successfully")
async def _driver_worker(self):
"""Load the NBA page and extract game IDs."""
game_ids = []
for league in self.leagues:
if league == "nba":
url = ";
self.logger.info(f"Fetching games for NBA from {url}")
page = await self.browser.get(url, new_tab=True)
await asyncio.sleep(2) # Allow page to load
try:
await page.find("section[data-testid='marketplace-shelf-']", timeout=30000)
body_el = await page.query_selector("body")
if not body_el:
self.logger.error("Could not find body element")
continue
js_function = "(elem) => { return elem.innerHTML; }"
content = await body_el.apply(js_function)
# Example regex; adjust if necessary based on page content.
matches = re.findall(r"default\|([a-f0-9-]+)\|([a-f0-9-]+)", content, re.IGNORECASE)
for game_id, _ in matches:
game_ids.append((league, game_id))
self.logger.info(f"Found {len(matches)} games for NBA")
except Exception as e:
self.logger.error(f"Error extracting games from NBA page: {e}")
finally:
await page.close()
return game_ids
async def _fetch_odds_for_game(self, league, game_id, worker_id):
"""For a given game, open the player props page and intercept the network response."""
url = (
f"/"
f"event/{game_id}/section/player_props"
)
try:
# Open new tab for the game page
page = await self.browser.get(url, new_tab=True)
self.logger.info(f"Worker {worker_id}: Opened page {url}")
# Enable network monitoring
await page.send(nodriver.cdpwork.enable())
async def handle_response(evt):
if "operationName=Marketplace&variables" in evt.response.url:
self.logger.info(f"Worker {worker_id}: Intercepted response: {evt.response.url}")
try:
params = {"requestId": evt.request_id}
response = await page.send("Network.getResponseBody", params)
if response and "body" in response:
body = response["body"]
if response.get("base64Encoded", False):
body = base64.b64decode(body).decode("utf-8")
self.logger.info(f"Worker {worker_id}: Response body: {body}")
except Exception as e:
self.logger.error(f"Worker {worker_id}: Error retrieving response body: {e}")
# Register the response handler
page.add_handler(nodriver.cdpwork.ResponseReceived, handle_response)
# Wait long enough to allow the API request to occur and be intercepted
await asyncio.sleep(60)
await page.close()
except Exception as e:
self.logger.error(f"Worker {worker_id}: Error fetching odds for game {game_id}: {e}")
async def scrape(self):
await self.startup()
try:
game_ids = await self._driver_worker()
if not game_ids:
self.logger.error("No game IDs found")
return
tasks = []
worker_id = 0
for league, game_id in game_ids:
tasks.append(asyncio.create_task(self._fetch_odds_for_game(league, game_id, worker_id)))
worker_id += 1
await asyncio.gather(*tasks)
finally:
await self.cleanup()
async def main():
fetcher = ESPNBetFetcher(["nba"])
await fetcher.scrape()
if __name__ == "__main__":
asyncio.run(main())
Additional Comments:
Network.getResponseBody
might be failing or returning empty data would be greatly appreciated.Thanks for your help!