Question 1

import requests, re
from bs4 import BeautifulSoupdata = []soup = BeautifulSoup(requests.get('https://www.booking.com/searchresults.html?label=gen173nr-1FCAEoggI46AdIM1gEaGyIAQGYATG4ARfIAQzYAQHoAQH4AQKIAgGoAgO4AuS4sJ4GwAIB0gIkYWJlYmZiMWItNWJjMi00M2Y2LTk3MGUtMzI2ZGZmMmIyNzMz2AIF4AIB&aid=304142&dest_id=-2092174&dest_type=city&group_adults=2&req_adults=2&no_rooms=1&group_children=0&req_children=0&nflt=ht_id%3D204&rows=15',headers={'user-agent':'some agent'}).text)num_results = int(re.search(r'\d+',soup.select_one('div:has(+[data-testid="pagination"])').text).group(0))for i in range(0,int(num_results/25)):soup = BeautifulSoup(requests.get(f'https://www.booking.com/searchresults.html?label=gen173nr-1FCAEoggI46AdIM1gEaGyIAQGYATG4ARfIAQzYAQHoAQH4AQKIAgGoAgO4AuS4sJ4GwAIB0gIkYWJlYmZiMWItNWJjMi00M2Y2LTk3MGUtMzI2ZGZmMmIyNzMz2AIF4AIB&aid=304142&dest_id=-2092174&dest_type=city&group_adults=2&req_adults=2&no_rooms=1&group_children=0&req_children=0&nflt=ht_id%3D204&rows=15&offset={int(i*25)}',headers={'user-agent':'some agent'}).text)data.extend([e.select_one('[data-testid="title"]').text for e in soup.select('[data-testid="property-card"]')])data.extend([e.select_one('[class="d8eab2cf7f c90c0a70d3 db63693c62"]') for e in soup.select('[data-testid="property-card"]')])data

enter image description here

I am getting name and reviews for all pages in a single line, i want to get this result in separate columns for names and reviews.

I want to get my result like this:

enter image description here

Question 2

Actually I couldn't understand your question, what do yo want. If you could show a sample dataframe you want it would be great. But generally you can do it like that. For example in this data latitude longitude is in same column and you can separate them to two columns with split function. Don't forget to add headers.

import requests
from bs4 import BeautifulSoup as bs
from datetime import datetimebase_url = 'https://www.booking.com'
urlss = 'https://www.booking.com/searchresults.html?req_children=0&label=gen173nr-1FCAEoggI46AdIM1gEaGyIAQGYATG4ARfIAQzYAQHoAQH4AQKIAgGoAgO4AuS4sJ4GwAIB0gIkYWJlYmZiMWItNWJjMi00M2Y2LTk3MGUtMzI2ZGZmMmIyNzMz2AIF4AIB&group_children=0&dest_type=city&rows=15&aid=304142&dest_id=-2092174&nflt=ht_id%3D204&req_adults=2&no_rooms=1&group_adults=2'data = []
def pars(url):r = requests.get(url)soup = bs(r.text, 'html.parser')foor = {}try:foor['description'] = soup.find('div', id = 'property_description_content').textfoor['Title'] = soup.find('h2', class_  = 'd2fee87262 pp-header__title').textx = soup.find_all('div', class_ = 'a815ec762e ab06168e66')div_map = soup.select_one('#hotel_sidebar_static_map')if div_map:foor['x_lnge'] = div_map['data-atlas-latlng']for f in range(0, len(x)):foor[f'feature{f}'] =(x[f].text)data.append(foor)except:None
def general():r = requests.get(urlss)soup = bs(r.text, 'html.parser')x = soup.select('header > a')for f in x:urls = base_url + f['href']obj = {}obj['urls'] = urlsprint(urls)pars(urls)f = []
def export_data(data):f = pd.DataFrame(data)f = f.drop_duplicates()presentday = datetime.now()pese = str(presentday)a = str(presentday)[0:10].replace('-', '_')f.to_excel(f'{a}booking.xlsx', index=False)if __name__ == '__main__':general()export_data(data)

How to create DataFrame with columns based on scraped data?

Related Q&A

How do i change the colour of a button border tkinter

module object has no attribute Gridspec despite calling help(gridspec) revealing the Gridspec class

Python division doesnt work as expected for large numbers [duplicate]

working out an average of the values in a dictionary

getting an error when trying to import a list into a mysql table

Getting a view does not return a valid response error message on my flask chatbot [duplicate]

Django how to add data to Object from queryset

before_action ... only: how to do this in python flask? [closed]

Destroy function not destroying a frame efficiently after the first iteration in Tkinter Python

Access columns and rows of numpy.ndarray