사용법

content = StripHTML(content)

Function StripHTML(oSource)

dim Result_Text

Result_text = ReplaceText(oSource," ( )+"," ")

Result_text = Replace(Result_text,"=" & vbcrlf,"")
Result_text = Replace(Result_text,";" & vblrcf,"")

' Remove the header (prepare first by clearing attributes)

' head 태그 안의 모든 내용을 지운다
Result_text = ReplaceText(Result_text,"<( )*head([^>])*>","<head>")
Result_text = ReplaceText(Result_text,"(<( )*(/)( )*head( )*>)","</head>")
Result_text = ReplaceText(Result_text,"(<head>)[\s\S]*(</head>)","")

' remove all scripts (prepare first by clearing attributes)

' script 태그 안의 모든 내용을 지운다
Result_text = ReplaceText(Result_text,"<( )*script([^>])*?>","<script>")
Result_text = ReplaceText(Result_text,"(<( )*(/)( )*?script()*>)","</script>")
Result_text = ReplaceText(Result_text,"(<script>)([^(<script>\.</script>)])*?(</script>)","")
Result_text = ReplaceText(Result_text,"(<script>)[\s\S]*?(</script>)","")

' remove all styles (prepare first by clearing attributes)

' style 태그 안의 모든 내용을 지운다
Result_text = ReplaceText(Result_text,"<( )*style([^>])*?>","<style>")
Result_text = ReplaceText(Result_text,"(<( )*(/)( )*?style( )*>)","</style>")
Result_text = ReplaceText(Result_text,"(<style>)[\s\S]*?(</style>)","")

' remove all object (prepare first by clearing attributes)

' object 태그 안의 모든 내용을 지운다
Result_text = ReplaceText(Result_text,"<( )*object([^>])*?>","<object>")
Result_text = ReplaceText(Result_text,"(<( )*(/)( )*?object( )*>)","</object>")
Result_text = ReplaceText(Result_text,"(<object>)[\s\S]*?(</object>)","")

' Remove the link (prepare first by clearing attributes)

' link 태그 안의 모든 내용을 지운다
Result_text = ReplaceText(Result_text,"<( )*link([^>])*>","<link>")
Result_text = ReplaceText(Result_text,"(<( )*(/)( )*link( )*>)","</link>")
Result_text = ReplaceText(Result_text,"(<link>)[\s\S]*(</link>)","")

' 자바스크립트 함수 치환

Result_text = ReplaceText(Result_text,"onclick=","xonclick=")
Result_text = ReplaceText(Result_text,"onmouseover=","xonmouseover=")
Result_text = ReplaceText(Result_text,"onmouseout=","xonmouseout=")
Result_text = ReplaceText(Result_text,"onchange=","xonchange=")
Result_text = ReplaceText(Result_text,"href=""javascript","href=""xjavascript")

' span 및 div 태그의 속성을 제거

Result_text = ReplaceText(Result_text,"<( )*span([^>])*?>","<span>")
Result_text = ReplaceText(Result_text,"(<( )*(/)( )*?span( )*>)","</span>")

Result_text = ReplaceText(Result_text,"<( )*div([^>])*?>","<div>")
Result_text = ReplaceText(Result_text,"(<( )*(/)( )*?div( )*>)","</div>")

' input 태그를 지운다

Result_text = ReplaceText(Result_text,"<( )*input([^>])*?>","")

' Remove remaining tags like <a>, links, images, comments etc - anything thats enclosed inside < >

' 허용태그 이외의 태그 제거

Result_text = ReplaceText(Result_text,"<[^(image|a|div|span|table|tr|td|li|p)]*?>","")

' Thats it.
StripHTML = Result_Text
End Function

Function ReplaceText(str1, patrn, replStr)
Dim regEx
Set regEx = New RegExp
with regEx
.Pattern = patrn
.IgnoreCase = True
.Global = True
end with
ReplaceText = regEx.Replace(str1, replStr)
End Function

2012/02/07 09:58 2012/02/07 09:58

Trackback Address :: https://youngsam.net/trackback/1753