html1="""
<!DOCTYPE html><html lang="en" xmlns="http://www.w3.org/1999/xhtml"><head> <meta charset="utf-8" /> <title>我的第一个网页</title> <meta name="generator" content="EverEdit" /> <meta name="author" content="" /> <meta name="keywords" content="" /> <meta name="description" content="" /></head><body> <div class="rows"> <a href="http://www.baidu.com/" target="_blank"> <div class="col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color1"> <span class="vfsd_a_title">百度</span> </div> </a> <a href="http://www.google.com/" target="_blank"> <div class="col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color3"> <span class="vfsd_a_title">Google</span> </div> </a> <a href="http://www.oschina.net/" target="_blank"> <div class="col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color2"> <span class="vfsd_a_title">Stack Overflow</span> </div> </a> </div> <p class="col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color2">你好 <span class="vfsd_a_title">CSDN</span> </p> <p class="col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color2"> <span class="vfsd_a_title">FaceBook</span> </p> <p class="nmn" id="nmn1"> <span class="vfsd_a_title">开源中国</span> </p></body></html>"""from bs4 import BeautifulSoup
soup = BeautifulSoup(html1,'lxml')print(soup.title)
####################输出:
我的第一个网页
print(soup.title.string)
####################输出:
我的第一个网页
print(soup.head)
####################输出:
我的第一个网页
for i,child in enumerate(soup.div.children):
print(i,child)
####################输出:
['\n',百度, '\n',Stack Overflow, '\n']