How to scrape all product review from lazada in python

2024/10/5 15:19:44

i currently working on web scraping of data from the lazada site using selenium in python: https://www.lazada.sg/products/loreal-paris-uv-perfect-even-complexion-sunscreen-spf50pa-30ml-i214861100-s325723972.html?spm=a2o42.seller.list.1.75895319pt8HKU&mp=1

However, i am only able to extract out the first page of the product review. Does anyone know how to extract review from page2?

Here are the codes (but there is error that Element is not clickable at point from the codes below):

from selenium import webdriver
from bs4 import BeautifulSoup as soup
import time
from selenium.webdriver.chrome.options import Optionsurl = 'https://www.lazada.sg/products/loreal-paris-uv-perfect-even-complexion-sunscreen-spf50pa-30ml-i214861100-s325723972.html?spm=a2o42.seller.list.1.75895319pt8HKU&mp=1'
chrome_options = Options()
#chrome_options.add_argument("--headless")driver = webdriver.Chrome(executable_path='chromedriver',chrome_options=chrome_options)
driver.get(url)
time.sleep(0.1)review_csv=[]
product_csv = []
rating_csv =[]
date_review_csv = []titles = driver.find_element_by_class_name('pdp-mod-product-badge-title').text
print(titles)
product_reviews = driver.find_elements_by_css_selector("[class='item']")urls = []#Page 1 of product review
for product in product_reviews :review = product.find_element_by_css_selector("[class='content']").textif(review != "" or review.strip()):print(review)review_csv.append(review)else:print(review)review_csv.append("No comments/review is an image")#Product Purchase#Check if the product purchase existsproduct_purchase = product.find_element_by_css_selector("[class='skuInfo']").textprint(product_purchase)product_csv.append(product_purchase)#Star ratingstar_ratings = product.find_elements_by_css_selector("[class='star']")stars = "https://laz-img-cdn.alicdn.com/tfs/TB19ZvEgfDH8KJjy1XcXXcpdXXa-64-64.png"star_rate = 0for rating in star_ratings:#print(rating.get_attribute('src'))if(rating.get_attribute('src') == stars):star_rate = star_rate + 1rating_csv.append(star_rate)print(star_rate)# Date of Reviewdate = product.find_element_by_css_selector("[class='title right']").textdate_review_csv.append(date)print(date)#Page 2 of product review onwards
page2_product_reviews = driver.find_element_by_xpath('//*[@id="module_product_review"]/div/div[3]/div[2]/div/div/button[2]').click()
for product in page2_product_reviews :review = product.find_element_by_css_selector("[class='content']").textif(review != "" or review.strip()):print(review)review_csv.append(review)else:print(review)review_csv.append("No comments/review is an image")#Product Purchase#Check if the product purchase existsproduct_purchase = product.find_element_by_css_selector("[class='skuInfo']").textprint(product_purchase)product_csv.append(product_purchase)#Star ratingstar_ratings = product.find_elements_by_css_selector("[class='star']")stars = "https://laz-img-cdn.alicdn.com/tfs/TB19ZvEgfDH8KJjy1XcXXcpdXXa-64-64.png"star_rate = 0for rating in star_ratings:#print(rating.get_attribute('src'))if(rating.get_attribute('src') == stars):star_rate = star_rate + 1rating_csv.append(star_rate)print(star_rate)# Date of Reviewdate = product.find_element_by_css_selector("[class='title right']").textdate_review_csv.append(date)print(date)driver.close()

Thank you in advance!

Answer

To do pagination use infinite while loop and #Check for button next-pagination-item have **disable** attribute then jump from loop else click on the next button.

Code:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import timedriver=webdriver.Chrome(executable_path='chromedriver')
driver.get("https://www.lazada.sg/products/loreal-paris-uv-perfect-even-complexion-sunscreen-spf50pa-30ml-i214861100-s325723972.html?spm=a2o42.seller.list.1.758953196tH2Mn&mp=1")
review_csv=[]
product_csv = []
rating_csv =[]
date_review_csv = []
titles = driver.find_element_by_class_name('pdp-mod-product-badge-title').text
print(titles)
while True:#Get the review details hereWebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,"div.item")))product_reviews = driver.find_elements_by_css_selector("[class='item']")# Get product reviewfor product in product_reviews:review = product.find_element_by_css_selector("[class='content']").textif (review != "" or review.strip()):print(review)review_csv.append(review)else:print(review)review_csv.append("No comments/review is an image")# Product Purchase# Check if the product purchase existsproduct_purchase = product.find_element_by_css_selector("[class='skuInfo']").textprint(product_purchase)product_csv.append(product_purchase)# Star ratingstar_ratings = product.find_elements_by_css_selector("[class='star']")stars = "https://laz-img-cdn.alicdn.com/tfs/TB19ZvEgfDH8KJjy1XcXXcpdXXa-64-64.png"star_rate = 0for rating in star_ratings:# print(rating.get_attribute('src'))if (rating.get_attribute('src') == stars):star_rate = star_rate + 1rating_csv.append(star_rate)print(star_rate)# Date of Reviewdate = product.find_element_by_css_selector("[class='title right']").textdate_review_csv.append(date)print(date)#Check for button next-pagination-item have disable attribute then jump from loop else click on the next buttonif len(driver.find_elements_by_css_selector("button.next-pagination-item.next[disabled]"))>0:break;else:button_next=WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "button.next-pagination-item.next")))driver.execute_script("arguments[0].click();", button_next)print("next page")time.sleep(2)
driver.close()
print(review_csv)
print(product_csv)
print(rating_csv)
print(date_review_csv)

List printed like:

['Fast delivery, send within 3 days, in bubble envelope. Product expiry date : 0522', 'received in good condition. have not try it yet', 'No comments/review is an image', 'No comments/review is an image', 'No comments/review is an image', 'No comments/review is an image', 'No comments/review is an image', 'No comments/review is an image']

['Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30', 'Volume (ml):30']

[5, 5, 5, 4, 4, 5, 5, 5]

['24 Oct 2019', '17 Nov 2019', '21 Nov 2019', '25 Oct 2019', '29 Aug 2019', '24 Apr 2019', '19 Jan 2019', '11 Nov 2018']
https://en.xdnf.cn/q/120178.html

Related Q&A

How to compare 2 successive row values in a resultset object using python

I have a table issue_logs:id | issue_id | from_status | to_status | up_date | remarks ----+----------+-------------+-----------+----------------------------------+----------…

Getting all possible combination for [1,0] with length 3 [0,0,0] to [1,1,1]

from itertools import combinationsdef n_length_combo(arr, n):# using set to deal# with duplicates return list(combinations(arr, n))# Driver Function if __name__ == "__main__":arr = 01n = 3pri…

Compare values under multiple conditions of one column in Python

I have the following data:data = {"index": [1, 2, 3, 4, 5],"name": ["A", "A", "B", "B", "B"],"type": [s1, s2, s1, s2, s3]…

Python: Tkinter :Dynamically Create Label

I am trying to create Label Dynamically , I am getting invalid Syntax. Can you please help me what i am missing or any alternativecrsr = cnxn.execute(query)row_num=2column_num=0Variable_Number=1for row…

TypeError: str object is not callable when trying to click datepicker

The relevant HTML<div id="datepickerbox" class="ym-gbox-left"><div class="datepick_label"><div id="datepicker" class="hasDatepicker">…

Stanford parser with NLTK produces empty output

I am trying to use the Stanford parser in a small application written in Python with the NLTK interface. I tried the code given below.Everything seems to work right, no errors, Java is launched but I s…

How do you return a list of the matched item in string with regex? [duplicate]

This question already has answers here:Regular expression to match a dot [duplicate](8 answers)Closed 3 years ago.I made this simple functions that searches for emails in the source code of a page , th…

Indentation Error [closed]

This question is unlikely to help any future visitors; it is only relevant to a small geographic area, a specific moment in time, or an extraordinarily narrow situation that is not generally applicable…

open csv file in python to customize dictionary [duplicate]

This question already has answers here:Creating a dictionary from a CSV file(4 answers)Closed 9 years ago.I would like to know to load this csv file:Epitope,ID,Frequency,AssayAVNIVGYSNAQGVDY,123431,27.…

How does UserPassesTestMixin in django work?

views.pyclass ProfileEdit(UserPassesTestMixin, UpdateView):model = Userform_class = ProfileFormtemplate_name="profile/profile_new.html"def test_func(self):x = self.request.user.idprint (x)y =…