Question 1

i currently working on web scraping of data from the lazada site using selenium in python: https://www.lazada.sg/products/loreal-paris-uv-perfect-even-complexion-sunscreen-spf50pa-30ml-i214861100-s325723972.html?spm=a2o42.seller.list.1.75895319pt8HKU&mp=1

However, i am only able to extract out the first page of the product review. Does anyone know how to extract review from page2?

Here are the codes (but there is error that Element is not clickable at point from the codes below):

from selenium import webdriver
from bs4 import BeautifulSoup as soup
import time
from selenium.webdriver.chrome.options import Optionsurl = 'https://www.lazada.sg/products/loreal-paris-uv-perfect-even-complexion-sunscreen-spf50pa-30ml-i214861100-s325723972.html?spm=a2o42.seller.list.1.75895319pt8HKU&mp=1'
chrome_options = Options()
#chrome_options.add_argument("--headless")driver = webdriver.Chrome(executable_path='chromedriver',chrome_options=chrome_options)
driver.get(url)
time.sleep(0.1)review_csv=[]
product_csv = []
rating_csv =[]
date_review_csv = []titles = driver.find_element_by_class_name('pdp-mod-product-badge-title').text
print(titles)
product_reviews = driver.find_elements_by_css_selector("[class='item']")urls = []#Page 1 of product review
for product in product_reviews :review = product.find_element_by_css_selector("[class='content']").textif(review != "" or review.strip()):print(review)review_csv.append(review)else:print(review)review_csv.append("No comments/review is an image")#Product Purchase#Check if the product purchase existsproduct_purchase = product.find_element_by_css_selector("[class='skuInfo']").textprint(product_purchase)product_csv.append(product_purchase)#Star ratingstar_ratings = product.find_elements_by_css_selector("[class='star']")stars = "https://laz-img-cdn.alicdn.com/tfs/TB19ZvEgfDH8KJjy1XcXXcpdXXa-64-64.png"star_rate = 0for rating in star_ratings:#print(rating.get_attribute('src'))if(rating.get_attribute('src') == stars):star_rate = star_rate + 1rating_csv.append(star_rate)print(star_rate)# Date of Reviewdate = product.find_element_by_css_selector("[class='title right']").textdate_review_csv.append(date)print(date)#Page 2 of product review onwards
page2_product_reviews = driver.find_element_by_xpath('//*[@id="module_product_review"]/div/div[3]/div[2]/div/div/button[2]').click()
for product in page2_product_reviews :review = product.find_element_by_css_selector("[class='content']").textif(review != "" or review.strip()):print(review)review_csv.append(review)else:print(review)review_csv.append("No comments/review is an image")#Product Purchase#Check if the product purchase existsproduct_purchase = product.find_element_by_css_selector("[class='skuInfo']").textprint(product_purchase)product_csv.append(product_purchase)#Star ratingstar_ratings = product.find_elements_by_css_selector("[class='star']")stars = "https://laz-img-cdn.alicdn.com/tfs/TB19ZvEgfDH8KJjy1XcXXcpdXXa-64-64.png"star_rate = 0for rating in star_ratings:#print(rating.get_attribute('src'))if(rating.get_attribute('src') == stars):star_rate = star_rate + 1rating_csv.append(star_rate)print(star_rate)# Date of Reviewdate = product.find_element_by_css_selector("[class='title right']").textdate_review_csv.append(date)print(date)driver.close()

Thank you in advance!

Question 2

To do pagination use infinite while loop and #Check for button next-pagination-item have **disable** attribute then jump from loop else click on the next button.

Code:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import timedriver=webdriver.Chrome(executable_path='chromedriver')
driver.get("https://www.lazada.sg/products/loreal-paris-uv-perfect-even-complexion-sunscreen-spf50pa-30ml-i214861100-s325723972.html?spm=a2o42.seller.list.1.758953196tH2Mn&mp=1")
review_csv=[]
product_csv = []
rating_csv =[]
date_review_csv = []
titles = driver.find_element_by_class_name('pdp-mod-product-badge-title').text
print(titles)
while True:#Get the review details hereWebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,"div.item")))product_reviews = driver.find_elements_by_css_selector("[class='item']")# Get product reviewfor product in product_reviews:review = product.find_element_by_css_selector("[class='content']").textif (review != "" or review.strip()):print(review)review_csv.append(review)else:print(review)review_csv.append("No comments/review is an image")# Product Purchase# Check if the product purchase existsproduct_purchase = product.find_element_by_css_selector("[class='skuInfo']").textprint(product_purchase)product_csv.append(product_purchase)# Star ratingstar_ratings = product.find_elements_by_css_selector("[class='star']")stars = "https://laz-img-cdn.alicdn.com/tfs/TB19ZvEgfDH8KJjy1XcXXcpdXXa-64-64.png"star_rate = 0for rating in star_ratings:# print(rating.get_attribute('src'))if (rating.get_attribute('src') == stars):star_rate = star_rate + 1rating_csv.append(star_rate)print(star_rate)# Date of Reviewdate = product.find_element_by_css_selector("[class='title right']").textdate_review_csv.append(date)print(date)#Check for button next-pagination-item have disable attribute then jump from loop else click on the next buttonif len(driver.find_elements_by_css_selector("button.next-pagination-item.next[disabled]"))>0:break;else:button_next=WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "button.next-pagination-item.next")))driver.execute_script("arguments[0].click();", button_next)print("next page")time.sleep(2)
driver.close()
print(review_csv)
print(product_csv)
print(rating_csv)
print(date_review_csv)

List printed like:

['Fast delivery, send within 3 days, in bubble envelope. Product expiry date : 0522', 'received in good condition. have not try it yet', 'No comments/review is an image', 'No comments/review is an image', 'No comments/review is an image', 'No comments/review is an image', 'No comments/review is an image', 'No comments/review is an image']

['Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30']

[5, 5, 5, 4, 4, 5, 5, 5]

['24 Oct 2019', '17 Nov 2019', '21 Nov 2019', '25 Oct 2019', '29 Aug 2019', '24 Apr 2019', '19 Jan 2019', '11 Nov 2018']

How to scrape all product review from lazada in python

Related Q&A

How to compare 2 successive row values in a resultset object using python

Getting all possible combination for [1,0] with length 3 [0,0,0] to [1,1,1]

Compare values under multiple conditions of one column in Python

Python: Tkinter :Dynamically Create Label

TypeError: str object is not callable when trying to click datepicker

Stanford parser with NLTK produces empty output

How do you return a list of the matched item in string with regex? [duplicate]

Indentation Error [closed]

open csv file in python to customize dictionary [duplicate]

How does UserPassesTestMixin in django work?