Problem when scraping the 100 Movie titles.
Fabian Joseph
fabianjoseph063 at gmail.com
Thu Sep 22 03:36:43 EDT 2022
#Try using, it's save in json format of the website:
import json
import requests
from bs4 import BeautifulSoup
url = "https://www.empireonline.com/movies/features/best-movies-2/"
soup = BeautifulSoup(requests.get(url).content, "html.parser")
data = json.loads(soup.select_one("#__NEXT_DATA__").contents[0])
# uncomment this to print all data:
#print(json.dumps(data, indent=4))
def find_articles(data):
if isinstance(data, dict):
for k, v in data.items():
if k.startswith("ImageMeta:"):
yield v['image']['name']
else:
yield from find_articles(v)
elif isinstance(data, list):
for i in data:
yield from find_articles(i)
for a in find_articles(data):
print(a)
More information about the Python-list
mailing list