웹마짱구's 블로그

웹페이지 HTML 긁어오기 - With CDO.Message

Windows/ASP : 2006/12/26 14:18


<% Option Explicit
'####### 서브루틴
Function RegExpTest(patrn, test_str) '패턴검색
Dim objRegExp

Set objRegExp = New RegExp

objRegExp.Pattern = patrn
objRegExp.IgnoreCase = True
RegExpTest = objRegExp.Test(test_str)

Set objRegExp = Nothing
End Function

Function RegExpExec(patrn, test_str)
Dim objRegExp

Set objRegExp = New RegExp

objRegExp.Pattern = patrn
objRegExp.IgnoreCase = True
objRegExp.Global = True

Set RegExpExec = objRegExp.Execute(test_str)

Set objRegExp = Nothing
End Function

Function RegExpReplace(patrn, used_str, repl_str)
Dim objRegExp

Set objRegExp = New RegExp

objRegExp.Pattern = patrn
objRegExp.IgnoreCase = True
objRegExp.Global = True

RegExpReplace = objRegExp.Replace(used_str, repl_str)

Set objRegExp = Nothing
End Function

Sub get_site_array(patrn, using_str) '결과물 배열로 받기
Dim Result_Matches_Collection

Set Result_Matches_Collection = RegExpExec(patrn, using_str)
search_cnt = Result_Matches_Collection.Count

If search_cnt > 10 Then search_cnt = 10

For i = 0 To search_cnt - 1
 site_arr(i) = Result_Matches_Collection.Item(i).Value
Next

Set Result_Matches_Collection = Nothing
End Sub

Function get_url(used_str) 'URL 가져오기
Dim patrn, Result_Matches_Collection

patrn = "href=""http://\S+"""
Set Result_Matches_Collection = RegExpExec(patrn, used_str)

get_url = Replace(Replace(Result_Matches_Collection.Item(0).Value, "href=""http://", ""), """", "")

Set Result_Matches_Collection = Nothing
End Function

Function get_valid_str(used_str, patrn_1, patrn_2) 'URL 가져오기
Dim Result_Matches_Collection, patrn

patrn = patrn_1 & ".+" & patrn_2

Set Result_Matches_Collection = RegExpExec(patrn, used_str)

get_valid_str = Replace(Replace(Result_Matches_Collection.Item(0).Value, patrn_1, ""), patrn_2, "")

Set Result_Matches_Collection = Nothing
End Function

Function get_html_web()
Set objCDO = Server.CreateObject("CDO.Message")

objCDO.CreateMHTMLBody "http://dirsearch.naver.com/search.naver?where=siteweb&query=" & Server.UrlEncode(search_word) & "&hw=1",31
get_html_web = objCDO.HTMLBody

Set objCDO = Nothing
End Function
'############# 서브루틴
Dim objCDO, get_html, search_word, pattern_1, pattern_2, i, search_cnt, site_arr(9), site_summary(9), site_url(9)

search_word = Request.QueryString("search_word")

Set objCDO = Server.CreateObject("CDO.Message")

objCDO.CreateMHTMLBody "http://dirsearch.naver.com/search.naver?where=site&query=" & Server.UrlEncode(search_word),31
get_html = objCDO.HTMLBody

Set objCDO = Nothing

pattern_1 = "<div[^>]*>\s*(?:<a)[\s\S]*?(?:</div>)" '==>한
'"<div id=menulink>[\sWS]+?<a[\s\S]+?>[\s\S]+?</a>[\s\S]+?</div>"==>천상태자
'"<div id=menulink>[\s\S]+?(</div>)"==>원본

If InStr(get_html, "adultcheck.php") > 0 Then
Response.Write "<Script Language=JavaScript>parent.document.all['yahoo_frm'].src=""about:blank"";parent.document.all['daum_frm'].src=""about:blank"";alert(""성인키워드"");history.go(-1);</Script>"
Response.End
ElseIf RegExpTest(pattern_1, get_html) Then '웹사이트 결과물이 있음
Call get_site_array(pattern_1, get_html)

For i = 0 To search_cnt - 1
'  If Not RegExpTest("<font size=2 color=gray>", site_arr(i)) Then
  site_url(i) = get_url(site_arr(i))

  site_summary(i) = RegExpReplace("<a.+site""\)'>", site_arr(i), "")
  site_summary(i) = RegExpReplace("<a.+</a>", site_summary(i), "")
  site_summary(i) = Replace(Replace(site_summary(i), "<div id=menulink>", ""), "</div>", "")

  Response.Write "<img src=""/images/search_icon_01.gif""><a href=""http://" & site_url(i) & """ target=""naver"">" & site_summary(i) & "<br><font color=""#779252"">http://"
   
  If Len(site_url(i)) > 70 Then site_url(i) = Left(site_url(i), 70) & ".."

  Response.Write site_url(i) & "</font><p>"
'  End If
Next
Else
get_html = get_html_web

pattern_2 = "<font size=""2"" id=""ln11"">[\s\S]+?(<font size=""2"" id=""ln11"">)"

If RegExpTest(pattern_2, get_html) Then

 Call get_site_array(pattern_2, get_html)

 For i = 0 To search_cnt - 1
  site_url(i) = get_url(site_arr(i))

  site_summary(i) = RegExpReplace("<font.+siteweb""\)'>", site_arr(i), "")
  site_summary(i) = RegExpReplace("</td>[\s\S\.]+", site_summary(i), "")

  Response.Write "<img src=""/images/search_icon_01.gif""><a href=""http://" & site_url(i) & """ target=""naver"">" & site_summary(i) & "<br><font color=""#779252"">http://"
 
  If Len(site_url(i)) > 70 Then site_url(i) = Left(site_url(i), 70) & ".."

  Response.Write site_url(i) & "</font><p>"
 Next
Else
 Response.Write "검색어 <b>" & search_word & "</b> 에 대한 검색결과가 없거나 시간 초과입니다."
End If
End If
%>
<body leftmargin="0" marginwidth="0" marginheight="0"
onload="parent.document.all['naver_frm'].height=document.body.scrollHeight;">
<style>
body{font-size:9pt; line-height:150%;}
td{font-size:9pt;}
img{border:none;}
</style>

top

Windows/ASP : 2006/12/26 14:18 Trackback ( 1 ) : Comment. : view 2399

Trackback Address :: http://jjangu.pe.kr/blog/trackback/1

  1. Tracked from mcsong's 나른한 illy coffee 2잔 2007/02/24 15:17 DELETE

    Subject: 외부 URL 페이지 가져오기

    아래 이미지의 형태를 구현하기 위한 순서이다.1. TextBox 1개, 버튼 2개, Literal 1개로 페이지를 구성한다.2. 비하인드 C# 코드에서 버튼에 대한 구현을 한다.2.1 가져오기 버튼 구현 System.Net.WebClient client = new System.Net.WebClient(); System.IO.Stream streamData = client.OpenRead(tbx_url.Text); System.IO.StreamRead..

Write a comment.




: 1 : ... 518 : 519 : 520 : 521 : 522 :