I want to extract web elements from the table 'MANUFACTURING AT A GLANCE' in the given website. But the name of the row has ' (single quote). This is interfering with my syntax. How do I overcome this issue? This code works for other rows.
import requests
from lxml import html, etreeism_pmi_url = 'https://www.instituteforsupplymanagement.org/ismreport/mfgrob.cfm?SSO=1'
page = requests.get(ism_pmi_url)
tree = html.fromstring(page.content)PMI_CustomerInventories = tree.xpath('//strong[text()="Customers' Inventories"]/../../following-sibling::td/p/text()')
PMI_CustomerInventories_Curr_Val = PMI_CustomerInventories[0]
this is my approach to avoid your problem.
maybe is not what you really need, but could help to you to get the idea.
#!/usr/bin/env python
# -*- coding: utf-8 -*-import lxml.html
import re
import requests
import lxml.html
from pprint import pprintdef load_lxml(response):return lxml.html.fromstring(response.text)url = 'https://www.instituteforsupplymanagement.org/ismreport/mfgrob.cfm?SSO=1'
response = requests.get(url)
root = load_lxml(response)headers = []
data = []
for index,row in enumerate(root.xpath('//*[@id="home_feature_container"]/div/div/div/span/table[2]/tbody/tr')):rows = []for cindex,column in enumerate(row.xpath('./th//text() | ./td//text()')):if cindex == 1:continuecolumn = column.strip()if index == 0 or not column:continueelif index == 1:headers.append(column)else:rows.append(column)if rows and len(rows) == 6:data.append(rows)data.insert(0,headers)pprint(data)
Result:
[['Series Index','Feb','Series Index','Jan','Percentage','Point','Change','Direction','Rate of Change','Trend* (Months)'],['65.1', '60.4', '+4.7', 'Growing', 'Faster', '6'],['62.9', '61.4', '+1.5', 'Growing', 'Faster', '6'],['54.2', '56.1', '-1.9', 'Growing', 'Slower', '5'],['54.8', '53.6', '+1.2', 'Slowing', 'Faster', '10'],['51.5', '48.5', '+3.0', 'Growing', 'From Contracting', '1'],['47.5', '48.5', '-1.0', 'Too Low', 'Faster', '5'],['68.0', '69.0', '-1.0', 'Increasing', 'Slower', '12'],['57.0', '49.5', '+7.5', 'Growing', 'From Contracting', '1'],['55.0', '54.5', '+0.5', 'Growing', 'Faster', '12'],['54.0', '50.0', '+4.0', 'Growing', 'From Unchanged', '1']]
[Finished in 2.9s]