All Posts Tagged “xpath”

lxml parse HTML or XML via XPath

import lxml
import lxml.html

# HTML
html_text = 'some html string'
doc = lxml.html.fromstring(html_text)
image_urls = doc.xpath('//img/@src')

url = 'http://www.gotceleb.com/rosie-huntington-whiteley-vogue-brazil-magazine-april-2013-2013-03-29.html'
doc = lxml.html.parse(url)
image_urls = doc.xpath('//img/@src')

# XML
xml_text = 'some xml string'
doc = lxml.etree.fromstring(xml_text)
image_urls = doc.xpath('//img/@src')