From 6a36518bbca33e21fcc7fc7fb66cda0b29390537 Mon Sep 17 00:00:00 2001 From: f0x1 Date: Fri, 14 Jan 2022 19:41:02 +0100 Subject: [PATCH] second loop --- ZeitOnlineRequests.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ZeitOnlineRequests.py b/ZeitOnlineRequests.py index f783b60..a1144ef 100644 --- a/ZeitOnlineRequests.py +++ b/ZeitOnlineRequests.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- -import os, requests, shutil +import requests from bs4 import BeautifulSoup -from time import time -from multiprocessing.pool import ThreadPool base_url = 'https://www.zeit.de/serie/die-kaenguru-comics?p=' @@ -14,5 +12,11 @@ for page in range(1, 15): soup = BeautifulSoup(r.text, 'html.parser') articles = soup.find_all('article') for article in articles: - comic_title = article.a.contents - print(comic_title) \ No newline at end of file + comic = dict() + target_page = requests.get(article.a.get('href')) + target_soup = BeautifulSoup(target_page.text, 'html.parser') + comic['title'] = target_soup.find('span', class_="article-heading__title") + comic['time'] = target_soup.find('time', class_="metadata__date").get('datetime') + comic['number'] = target_soup.find('span', class_="article-heading__kicker") + comic['image_url'] = target_soup.find(class_="scrollable-media-container").img.get('src') + print(comic) \ No newline at end of file