lxml: Parse HTML or XML with XPath in Python

lxml is the most feature-rich and easy-to-use library for processing XML and HTML in Python.

import lxml
import lxml.html

# HTML
html_text = 'some html string'
doc = lxml.html.fromstring(html_text)
image_urls = doc.xpath('//img/@src')

url = 'http://www.gotceleb.com/rosie-huntington-whiteley-vogue-brazil-magazine-april-2013-2013-03-29.html'
doc = lxml.html.parse(url)
image_urls = doc.xpath('//img/@src')

# XML
xml_text = 'some xml string'
doc = lxml.etree.fromstring(xml_text)
image_urls = doc.xpath('//img/@src')