# -*- coding: utf-8 -*- import requests import csv import os from bs4 import BeautifulSoup base_url = 'https://www.zeit.de/serie/die-kaenguru-comics?p=' csv_file = open('output.csv', 'w', newline='') csv_file_writer = csv.writer(csv_file, dialect='excel') for page in range(1, 15): page_url = base_url + str(page) print(page_url) r = requests.get(page_url) soup = BeautifulSoup(r.text, 'html.parser') articles = soup.find_all('article') for article in articles: comic = dict() target_page = requests.get(article.a.get('href')) target_soup = BeautifulSoup(target_page.text, 'html.parser') comic['title'] = target_soup.find('span', class_="article-heading__title").string comic['time'] = target_soup.find('time', class_="metadata__date").get('datetime') comic['number'] = target_soup.find('span', class_="article-heading__kicker").string.replace("Folge ", '') media_container = target_soup.find(class_="scrollable-media-container") if media_container: comic['image_url'] = media_container.img.get('src') else: media_container = target_soup.find(class_="collapsible-image__figure") if media_container: comic['image_url'] = media_container.img.get('src') else: print('media_container not found') csv_file_writer.writerow(comic) image_download = requests.get(comic['image_url']) if os.path.exists(comic['number']+'.webp'): comic['number'] += '_' file = open(comic['number']+'.webp', 'xb') file.write(image_download.content) file.close() print(comic) csv_file.close()