|
|
@ -1,8 +1,6 @@
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import os, requests, shutil
|
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from time import time
|
|
|
|
|
|
|
|
from multiprocessing.pool import ThreadPool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
base_url = 'https://www.zeit.de/serie/die-kaenguru-comics?p='
|
|
|
|
base_url = 'https://www.zeit.de/serie/die-kaenguru-comics?p='
|
|
|
|
|
|
|
|
|
|
|
@ -14,5 +12,11 @@ for page in range(1, 15):
|
|
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
|
|
articles = soup.find_all('article')
|
|
|
|
articles = soup.find_all('article')
|
|
|
|
for article in articles:
|
|
|
|
for article in articles:
|
|
|
|
comic_title = article.a.contents
|
|
|
|
comic = dict()
|
|
|
|
print(comic_title)
|
|
|
|
target_page = requests.get(article.a.get('href'))
|
|
|
|
|
|
|
|
target_soup = BeautifulSoup(target_page.text, 'html.parser')
|
|
|
|
|
|
|
|
comic['title'] = target_soup.find('span', class_="article-heading__title")
|
|
|
|
|
|
|
|
comic['time'] = target_soup.find('time', class_="metadata__date").get('datetime')
|
|
|
|
|
|
|
|
comic['number'] = target_soup.find('span', class_="article-heading__kicker")
|
|
|
|
|
|
|
|
comic['image_url'] = target_soup.find(class_="scrollable-media-container").img.get('src')
|
|
|
|
|
|
|
|
print(comic)
|