Scraping Indeed.com using bs4 and requests in Python

Python script, which scrapes first 10 job listings from Indeed.

 

from bs4 import BeautifulSoup
import requests
import pandas as pd

url = 'https://de.indeed.com/jobs?q=Accountant+&l=Berlin'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'lxml')
print(len(soup.find_all('div', {'class': 'row'})))
data = []
for i in soup.find_all('div', {'class': 'row'}):
    job_title = i.find('a', {'data-tn-element': 'jobTitle'})['title']
    company_name = i.find('span', {'class': 'company'}).text.strip()    
    job_summary = ''.join([j.text.strip() for j in i.find_all('span', 
                                                              {'class': 'summary'})])
    location = i.find('div', {'class': 'location'})
    if location is None:
        location = i.find('span', {'class': 'location'})
    if location is not None:
        location = location.text.strip()

    salary_range = i.find('span', {'class': 'salaryText'})
    if salary_range is not None:
        salary_range = salary_range.text.strip()

    summary = i.find ('div',{'class': 'summary'})
    if summary is not None:
        summary = summary.text.strip()

    datum = {'job_title': job_title,
             'company_name': company_name,
             'job_summary': job_summary,
             'location': location,
             'salary_range': salary_range,
             'summary': summary}

    data.append(datum)

df = pd.DataFrame(data)
df.head()

 


Support the project

Please feel free to support me on Patreon.