웹페이지 HTML 긁어오기 - With CDO.Message
<% Option Explicit
'####### 서브루틴
Function RegExpTest(patrn, test_str) '패턴검색
Dim objRegExp
Set objRegExp = New RegExp
objRegExp.Pattern = patrn
objRegExp.IgnoreCase = True
RegExpTest = objRegExp.Test(test_str)
Set objRegExp = Nothing
End Function
Function RegExpExec(patrn, test_str)
Dim objRegExp
Set objRegExp = New RegExp
objRegExp.Pattern = patrn
objRegExp.IgnoreCase = True
objRegExp.Global = True
Set RegExpExec = objRegExp.Execute(test_str)
Set objRegExp = Nothing
End Function
Function RegExpReplace(patrn, used_str, repl_str)
Dim objRegExp
Set objRegExp = New RegExp
objRegExp.Pattern = patrn
objRegExp.IgnoreCase = True
objRegExp.Global = True
RegExpReplace = objRegExp.Replace(used_str, repl_str)
Set objRegExp = Nothing
End Function
Sub get_site_array(patrn, using_str) '결과물 배열로 받기
Dim Result_Matches_Collection
Set Result_Matches_Collection = RegExpExec(patrn, using_str)
search_cnt = Result_Matches_Collection.Count
If search_cnt > 10 Then search_cnt = 10
For i = 0 To search_cnt - 1
site_arr(i) = Result_Matches_Collection.Item(i).Value
Next
Set Result_Matches_Collection = Nothing
End Sub
Function get_url(used_str) 'URL 가져오기
Dim patrn, Result_Matches_Collection
patrn = "href=""http://\S+"""
Set Result_Matches_Collection = RegExpExec(patrn, used_str)
get_url = Replace(Replace(Result_Matches_Collection.Item(0).Value, "href=""http://", ""), """", "")
Set Result_Matches_Collection = Nothing
End Function
Function get_valid_str(used_str, patrn_1, patrn_2) 'URL 가져오기
Dim Result_Matches_Collection, patrn
patrn = patrn_1 & ".+" & patrn_2
Set Result_Matches_Collection = RegExpExec(patrn, used_str)
get_valid_str = Replace(Replace(Result_Matches_Collection.Item(0).Value, patrn_1, ""), patrn_2, "")
Set Result_Matches_Collection = Nothing
End Function
Function get_html_web()
Set objCDO = Server.CreateObject("CDO.Message")
objCDO.CreateMHTMLBody "http://dirsearch.naver.com/search.naver?where=siteweb&query=" & Server.UrlEncode(search_word) & "&hw=1",31
get_html_web = objCDO.HTMLBody
Set objCDO = Nothing
End Function
'############# 서브루틴
Dim objCDO, get_html, search_word, pattern_1, pattern_2, i, search_cnt, site_arr(9), site_summary(9), site_url(9)
search_word = Request.QueryString("search_word")
Set objCDO = Server.CreateObject("CDO.Message")
objCDO.CreateMHTMLBody "http://dirsearch.naver.com/search.naver?where=site&query=" & Server.UrlEncode(search_word),31
get_html = objCDO.HTMLBody
Set objCDO = Nothing
pattern_1 = "<div[^>]*>\s*(?:<a)[\s\S]*?(?:</div>)" '==>한
'"<div id=menulink>[\sWS]+?<a[\s\S]+?>[\s\S]+?</a>[\s\S]+?</div>"==>천상태자
'"<div id=menulink>[\s\S]+?(</div>)"==>원본
If InStr(get_html, "adultcheck.php") > 0 Then
Response.Write "<Script Language=JavaScript>parent.document.all['yahoo_frm'].src=""about:blank"";parent.document.all['daum_frm'].src=""about:blank"";alert(""성인키워드"");history.go(-1);</Script>"
Response.End
ElseIf RegExpTest(pattern_1, get_html) Then '웹사이트 결과물이 있음
Call get_site_array(pattern_1, get_html)
For i = 0 To search_cnt - 1
' If Not RegExpTest("<font size=2 color=gray>", site_arr(i)) Then
site_url(i) = get_url(site_arr(i))
site_summary(i) = RegExpReplace("<a.+site""\)'>", site_arr(i), "")
site_summary(i) = RegExpReplace("<a.+</a>", site_summary(i), "")
site_summary(i) = Replace(Replace(site_summary(i), "<div id=menulink>", ""), "</div>", "")
Response.Write "<img src=""/images/search_icon_01.gif""><a href=""http://" & site_url(i) & """ target=""naver"">" & site_summary(i) & "<br><font color=""#779252"">http://"
If Len(site_url(i)) > 70 Then site_url(i) = Left(site_url(i), 70) & ".."
Response.Write site_url(i) & "</font><p>"
' End If
Next
Else
get_html = get_html_web
pattern_2 = "<font size=""2"" id=""ln11"">[\s\S]+?(<font size=""2"" id=""ln11"">)"
If RegExpTest(pattern_2, get_html) Then
Call get_site_array(pattern_2, get_html)
For i = 0 To search_cnt - 1
site_url(i) = get_url(site_arr(i))
site_summary(i) = RegExpReplace("<font.+siteweb""\)'>", site_arr(i), "")
site_summary(i) = RegExpReplace("</td>[\s\S\.]+", site_summary(i), "")
Response.Write "<img src=""/images/search_icon_01.gif""><a href=""http://" & site_url(i) & """ target=""naver"">" & site_summary(i) & "<br><font color=""#779252"">http://"
If Len(site_url(i)) > 70 Then site_url(i) = Left(site_url(i), 70) & ".."
Response.Write site_url(i) & "</font><p>"
Next
Else
Response.Write "검색어 <b>" & search_word & "</b> 에 대한 검색결과가 없거나 시간 초과입니다."
End If
End If
%>
<body leftmargin="0" marginwidth="0" marginheight="0"
onload="parent.document.all['naver_frm'].height=document.body.scrollHeight;">
<style>
body{font-size:9pt; line-height:150%;}
td{font-size:9pt;}
img{border:none;}
</style>
Trackback Address :: http://jjangu.pe.kr/blog/trackback/1
-
Tracked from mcsong's 나른한 illy coffee 2잔
2007/02/24 15:17
DELETE
Subject: 외부 URL 페이지 가져오기
아래 이미지의 형태를 구현하기 위한 순서이다.1. TextBox 1개, 버튼 2개, Literal 1개로 페이지를 구성한다.2. 비하인드 C# 코드에서 버튼에 대한 구현을 한다.2.1 가져오기 버튼 구현 System.Net.WebClient client = new System.Net.WebClient(); System.IO.Stream streamData = client.OpenRead(tbx_url.Text); System.IO.StreamRead..



