Hello,
I'm trying to read a text file with hyperlinks, to open the web pages for each link and to save the scrapped data into an Excel File.
I don't know what kind of test to use to determine that the open file reached the EOF. With the code below the system is freezing, as probably would be expected if ts.Readline is not returning empty string
Code is depicted below.
I would appreciate any answer.
Thank you,
Daniel
VBSTART
Dim IE_record
/* other functions */
Function ExtractTagRecord(TagName,Num,all)
dim t
set t = IE_record.document.getElementsbyTagname(Tagname)
if t.length > 0 then
if all=1 then
ExtractTagRecord = t.Item(Num).outerHTML
else
ExtractTagRecord = t.Item(Num).innerText
end if
end if
End Function
Sub LoadAEGRMRecords(input_file, output_file)
Dim xlApp
Dim xlBook
Dim xlSheet
Dim MyString, link, link_text
Dim fs, f, ts, s, k
Dim tagText1, tagText2, tagtext3, tagText4
Set xlApp = CreateObject("Excel.Application")
Set xlBook = xlApp.Workbooks.Add
Set xlSheet = xlBook.Worksheets(1)
xlSheet.Application.visible = True
/* open file for reading */
Set fs = CreateObject("Scripting.FileSystemObject")
Set f = fs.GetFile(input_file)
Set ts = f.OpenAsTextStream(1, -2)
MyString = ts.ReadLine
k = 1
Do While Not MyString="" ' Loop until end of file.
link_text=mid(MyString,50,84)
link="http://www.mj.romarhiva.ro/webarchive/"+link_text
call Navigate(link)
tagText1= ExtractTagRecord("td",2,0)
tagText2= ExtractTagRecord("td",4,0)
tagText3= ExtractTagRecord("td",6,0)
tagText4= ExtractTagRecord("td",8,0)
/* extract more data from Web Page */
xlSheet.Application.Cells(k,1).Value=tagText1
xlSheet.Application.Cells(k,2).Value=tagText2
xlSheet.Application.Cells(k,3).Value=tagText3
xlSheet.Application.Cells(k,4).Value=tagText4
/* save more data in Excel cells */
/* read the next line */
MyString = ts.ReadLine
k = k + 1
Loop
ts.Close
/* save and close the Excel file */
xlSheet.SaveAs output_file
xlSheet.Application.Quit
Set xlSheet = Nothing
Set xlBook = Nothing
Set xlApp=Nothing
End Sub
VBEND