This part of the WikiTaxi article.
Script File: ss_AllPages.vbs[]
If WScript.Arguments.Count < 3 Then wscript.echo "must specify parameter" wscript.quit End If Set htmlFile = GetFileFromArgument( 0 ) Set outputFolder = GetFolderFromArgument( 1 ) wikiSite = WScript.Arguments.Item(2) ' Load the htmlFile and get a pointer to the all tag of the DOM Set ie = CreateObject("InternetExplorer.Application") Set all = GetIEForHTML( ie, htmlFile.Path ) '============ ' Script Specific Logic ' Makes sure all tables have an ID FixIDs "table" ' Grab all the 'a' tags inside of the table called 'fly1'" Set alist = all.tags("a") If alist Is Nothing Then wscript.echo "could not get alist" : wscript.quit If alist.length = 0 Then wscript.echo "alist empty" : wscript.quit elementCount = 0 For Each a In alist If UCase(a.parentElement.tagName) = "TD" Then If a.parentElement.parentElement.parentElement.parentElement.id = "fly1" Then urlEnd = Replace( Mid( a.href, GetIndexAfterToken( a.href, "/wiki/" ), 999 ), "%", "%%" ) wscript.echo "wget -O """ & outputFolder.Path & "\" & strClean(a.innerText) & ".xml"" " & wikiSite & "/wiki/Special:Export/" & urlEnd wscript.echo "if not defined pingskip ping 1.1.1.1 -n 1 -w 1000" elementCount = elementCount + 1 End If End If Next wscript.echo "set alistelementcount=" & elementCount '============ ie.Quit Function GetFileFromArgument( ByVal arg ) Set fso = CreateObject("Scripting.FileSystemObject") Set GetFileFromArgument = fso.GetFile( WScript.Arguments.Item(arg) ) End Function Function GetFolderFromArgument( ByVal arg ) Set fso = CreateObject("Scripting.FileSystemObject") Set GetFolderFromArgument = fso.GetFolder( WScript.Arguments.Item(arg) ) End Function Sub FixIDs( ByVal tagName ) Set tagList = all.tags(tagName) If tagList Is nothing Then wscript.echo "could not get tagList:" & tagName : wscript.quit id = 0 For Each table In tagList If Len(table.id) = 0 Then table.id = "fly" & id : id = id + 1 Next End Sub Function GetIEForHTML( ByVal ie, ByVal path ) ie.Navigate htmlFile.Path, CLng(1024) WScript.sleep 5000 Set GetIEForHTML = ie.document.all End Function Function GetIndexAfterToken(ByVal outerText, ByVal searchFor ) index = InStr( outerText, searchFor ) If index > 0 Then index = index + Len( searchFor ) End If GetIndexAfterToken = CLng( index ) End Function Function strClean(strtoclean) Dim objRegExp, outputStr Set objRegExp = New Regexp objRegExp.IgnoreCase = True objRegExp.Global = True objRegExp.Pattern = "[(?*"",\\<>&#~%{}+_.@:\/!;]+" outputStr = objRegExp.Replace(strtoclean, "-") objRegExp.Pattern = "\-+" outputStr = objRegExp.Replace(outputStr, "-") strClean = outputStr End Function
This can also serve as an example for screen-scraping.