以猫眼电影为例
1,我们要获得猫眼电影榜单的好看的电影信息,影片名称,主演,以及观看和购票链接,获取后效果如下图所示
2.不难看出,我们只需要通过观察网页源代码,然后进行整合提取关键信息,在用正则表达式来截取想要的信息就可以对应的获取相应的信息,具体的代码及解释如下
Option Explicit
'获取猫眼电影榜单信息
Sub getdy()
Cells.Clear
'创建正则表达式
Dim ret As Object
Set ret = CreateObject("VBScript.RegExp")
With ret
.Global = False
.Pattern = "[\u4e00-\u9fa5]+"
End With
'创建HTML对象,进行连接请求
Dim ht As Object
Set ht = CreateObject("MSXML2.XMLHTTP")
Dim strurl As String
strurl = "http://maoyan.com/board"
With ht
.Open "get", strurl, False
.send
Do While .readystate <> 4
DoEvents
Loop
End With
'创建数组接收信息
Dim s() As String
Dim ming() As String
Cells(1, 1) = "影片名称"
Cells(1, 2) = "主演"
Cells(1, 3) = "链接"
'截取网页信息(影片名称)
s = Split(ht.responsetext, "<p class=""name")
ReDim ming(0 To UBound(s))
'将信息写入表格
Dim i As Integer
For i = 1 To UBound(s)
'用正则表达式对结果进行处理
ming(i) = ret.Execute(s(i))(0)
Cells(i + 1, 1) = ming(i)
Next
'更换正则表达式,截取汉字和非空格字符
ret.Pattern = "([\u4e00-\u9fa5]+\S+)"
'获取信息(主演)
s = Split(ht.responsetext, "<p class=""star")
Dim star() As String
ReDim star(0 To UBound(s))
For i = 1 To UBound(s)
star(i) = ret.Execute(s(i))(0)
Cells(i + 1, 2) = Replace(star(i), "主演:", "")
Next
'获取电影链接,即进入其购票和观看页面的链接
ret.Pattern = "\d+"
s = Split(ht.responsetext, "<p class=""name")
Dim lianjie() As String
ReDim lianjie(0 To UBound(s))
For i = 1 To UBound(s)
lianjie(i) = ret.Execute(s(i))(0)
Cells(i + 1, 3) = "http://maoyan.com/films/" & lianjie(i)
Next
End Sub