| 日本語形態素分析エンジンMeCabを使って,WebBrowserの表示にルビを振ります。 クロスドメインのiframe以外は,iframeの中も再帰的にテキストノードを検索してMeCabで分析し,ルビエレメントノードに置き換えます。(クロスドメインのiframeにアクセスしようとした際の例外(UnauthorizedAccessException)は握りつぶしています。) マネージコードだけでは,テキストノードを取得できないので,mshtmlを参照設定して使用しています。COMオブジェクトの開放はやっているつもりですが,完全ではないかもしれません。 |
まず,Microsoft HTML Object Libraryを参照設定してください。
次に,下のコードをフォームのコードにコピー・貼り付けをすれば動作します。Option Explicit On
Option Strict On
Imports System.IO
Imports System.Runtime.InteropServices
Imports System.Text.RegularExpressions
Imports Microsoft.Win32
Imports mshtml
Public Class Form1
Private WithEvents WebBrowser As New WebBrowser With {.Dock = DockStyle.Fill}
Private MeCab As New MeCab("--node-format=[#%M:%f[7]:%f[0]#] --unk-format=%M --eos-format=\0")
Public Enum NODETYPE As UShort
ELEMENT_NODE = 1
TEXT_NODE = 3
PROCESSING_INSTRUCTION_NODE = 7
COMMENT_NODE = 8
DOCUMENT_NODE = 9
DOCUMENT_TYPE_NODE = 10
DOCUMENT_FRAGMENT_NODE = 11
End Enum
Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load
BrowserEmulation.CreateRenderingModeRegkey(BrowserEmulation.Emulation.IE11Edge)
Me.WebBrowser.ScriptErrorsSuppressed = True
Me.Controls.Add(Me.WebBrowser)
Me.WebBrowser.GoHome()
End Sub
Private Sub Form1_FormClosing(sender As Object, e As FormClosingEventArgs) Handles Me.FormClosing
BrowserEmulation.DeleteRenderingModeRegkey()
MeCab.Dispose()
End Sub
Private Sub WebBrowser_DocumentCompleted(sender As Object, e As WebBrowserDocumentCompletedEventArgs) Handles WebBrowser.DocumentCompleted
Dim domElement As IHTMLDOMNode = DirectCast(Me.WebBrowser.Document.Body.DomElement, IHTMLDOMNode)
AttachRuby(domElement)
Marshal.FinalReleaseComObject(domElement)
End Sub
Private Sub AttachRuby(ByVal node As IHTMLDOMNode)
Select Case node.nodeName.ToLower
Case "ruby", "select"
Return
End Select
Dim document As IHTMLDocument2 = Nothing
Dim childNodes As IHTMLDOMChildrenCollection = Nothing
Try
document = DirectCast(DirectCast(node, IHTMLDOMNode2).ownerDocument, IHTMLDocument2)
childNodes = DirectCast(node.childNodes, IHTMLDOMChildrenCollection)
For Each childNode As IHTMLDOMNode In childNodes
Try
If childNode.nodeType = NODETYPE.TEXT_NODE Then
Dim childNodeValue As String = DirectCast(childNode.nodeValue, String)
If Regex.IsMatch(childNodeValue, "[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FA]") Then
Dim result As String = MeCab.Parse(childNodeValue)
result = Regex.Replace(result, "\[#(.+?):(.+?):(.+?)\#]", New MatchEvaluator(AddressOf RegexMatchEvaluator))
Dim newElement As IHTMLElement = Nothing
Try
newElement = document.createElement(String.Empty)
newElement.innerHTML = result
childNode.replaceNode(DirectCast(newElement, IHTMLDOMNode))
Finally
If newElement IsNot Nothing Then
Marshal.ReleaseComObject(newElement)
End If
End Try
End If
Else
If childNode.nodeName.ToLower = "frame" Or childNode.nodeName.ToLower = "iframe" Then
Dim frame As IHTMLWindow2 = Nothing
Dim frameDocument As IHTMLDocument2 = Nothing
Dim frameBody As IHTMLElement = Nothing
Try
frame = DirectCast(childNode, IHTMLFrameBase2).contentWindow
frameDocument = frame.document
frameBody = frameDocument.body
AttachRuby(DirectCast(frameBody, IHTMLDOMNode))
Catch ex As UnauthorizedAccessException
Finally
If frameBody IsNot Nothing Then
Marshal.ReleaseComObject(frameBody)
End If
If frameDocument IsNot Nothing Then
Marshal.ReleaseComObject(frameDocument)
End If
If frame IsNot Nothing Then
Marshal.ReleaseComObject(frame)
End If
End Try
Else
If childNode.hasChildNodes Then
AttachRuby(childNode)
End If
End If
End If
Finally
If childNode IsNot Nothing Then
Marshal.ReleaseComObject(childNode)
End If
End Try
Next
Finally
If childNodes IsNot Nothing Then
Marshal.ReleaseComObject(childNodes)
End If
If document IsNot Nothing Then
Marshal.ReleaseComObject(document)
End If
End Try
End Sub
Private Function RegexMatchEvaluator(ByVal M As Match) As String
Dim kanji As String = M.Groups(1).Value
Dim kana As String = StrConv(M.Groups(2).Value, VbStrConv.Hiragana)
Dim result As String = kanji
If Regex.IsMatch(kanji, "\p{IsCJKUnifiedIdeographs}") Then
result = "<ruby>" & kanji & "<rt>" & kana & "</rt></ruby>"
End If
Return result
End Function
End Class
Public Class BrowserEmulation
Public Enum Emulation
IE11Edge = 11001
IE11 = 11000
IE10Std = 10001
IE10 = 10000
IE9Std = 9999
IE9 = 9000
IE8Std = 8888
IE8 = 8000
IE7 = 7000
End Enum
Private Const FEATURE_BROWSER_EMULATION As String = "Software\Microsoft\Internet Explorer\Main\FeatureControl\FEATURE_BROWSER_EMULATION"
Public Shared Sub CreateRenderingModeRegkey(EmulationMode As Emulation)
Using Regkey As RegistryKey = Registry.CurrentUser.CreateSubKey(FEATURE_BROWSER_EMULATION)
Regkey.SetValue(GetReleaseBuildName, EmulationMode, RegistryValueKind.DWord)
Regkey.SetValue(GetDebugBuildName, EmulationMode, RegistryValueKind.DWord)
End Using
End Sub
Public Shared Sub DeleteRenderingModeRegkey()
Using Regkey As RegistryKey = Registry.CurrentUser.CreateSubKey(FEATURE_BROWSER_EMULATION)
Regkey.DeleteValue(GetReleaseBuildName)
Regkey.DeleteValue(GetDebugBuildName)
End Using
End Sub
Private Shared Function GetReleaseBuildName() As String
Return Path.GetFileName(Application.ExecutablePath)
End Function
Private Shared Function GetDebugBuildName() As String
Return GetReleaseBuildName.Replace(".exe", ".svhost.exe")
End Function
End Class
Public Class MeCab
Implements IDisposable
<DllImport("libmecab.dll", CallingConvention:=CallingConvention.Cdecl)>
Public Shared Function mecab_new2(ByVal arg As String) As IntPtr
End Function
<DllImport("libmecab.dll", CallingConvention:=CallingConvention.Cdecl)>
Public Shared Function mecab_sparse_tostr(ByVal m As IntPtr, ByVal str As String) As IntPtr
End Function
<DllImport("libmecab.dll", CallingConvention:=CallingConvention.Cdecl)>
Public Shared Sub mecab_destroy(ByVal m As IntPtr)
End Sub
Private ptrMeCab As IntPtr
Sub New()
Me.New(String.Empty)
End Sub
Sub New(ByVal Arg As String)
ptrMeCab = mecab_new2(Arg)
End Sub
Public Function Parse(ByVal [String] As String) As String
Dim ptrResult As IntPtr = mecab_sparse_tostr(ptrMeCab, [String])
Dim strResult As String = Marshal.PtrToStringAnsi(ptrResult)
Return strResult
End Function
Public Overloads Sub Dispose() Implements IDisposable.Dispose
mecab_destroy(ptrMeCab)
GC.SuppressFinalize(Me)
End Sub
Protected Overrides Sub Finalize()
Dispose()
End Sub
End Class