Here is what I have:
using System; using System.Collections.Generic; using System.Linq; using System.Text; using HtmlAgilityPack; using System.IO; namespace HTMLParseExample { class Program { static void Main(string[] args) { string[,] tempArray = { { "Type", "" }, { "Make", "" }, { "Number_of_Functions", "" }, { "Timer", "" } }; HtmlDocument hd = new HtmlDocument(); string htmlPath = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) + "\\Fubar.html"; if (File.Exists(htmlPath)) Console.WriteLine("Found file 'Fubar.html'"); else { Console.WriteLine("Unable to find file 'Fubar.html'"); Console.WriteLine(); Console.WriteLine("Pausing: "); Console.ReadLine(); Environment.Exit(1); } hd.Load(htmlPath); string stockPath = "//div[@class='col6']"; HtmlNode nct = hd.DocumentNode.SelectNodes(stockPath)[0]; HtmlNodeCollection Tables = nct.SelectNodes("table[@class='sTable']"); foreach (HtmlNode table in Tables) { HtmlNodeCollection Rows = table.SelectNodes("tbody/tr"); foreach (HtmlNode row in Rows) { HtmlNodeCollection TDs = row.SelectNodes("th|td"); for (int iTA = 0; iTA < tempArray.Length / 2; iTA++) { if (tempArray[iTA, 0].Replace("_", " ") == TDs[0].InnerText) tempArray[iTA, 1] = TDs[1].InnerText; } } } for (int l = 0; l < tempArray.Length / 2; l++) { Console.WriteLine("{0} - {1}", tempArray[l, 0], tempArray[l, 1]); } Console.WriteLine(); Console.Write("Pausing: "); Console.ReadLine(); } } }
And It works with this mockup:
<html>
<header>
<title>Fubar's Pavilion</title>
</header>
<body>
<div class="col5">
<table>
<caption>Fubar</caption>
<tbody>
<tr>
<th>FooBar1</th>
</tr>
</tbody>
</table>
</div>
<div class="col6">
<table class="sTable">
<caption>Information</caption>
<tbody>
<tr>
<th scope="row">Type</th>
<td>Broken</td>
</tr>
</tbody>
<tbody>
<tr>
<th scope="row">Make</th>
<td>Fors</td>
</tr>
</tbody>
</table>
<table class="sTable">
<caption>Functions</caption>
<tbody>
<tr>
<th scope="row">Number of Functions</th>
<td>16</td>
</tr>
</tbody>
<tbody>
<tr>
<th scope="row">Timer</th>
<td>198 minutes</td>
</tr>
</tbody>
</table>
</div>
</body>
</html>
And you should get an output of:
Found file 'Fubar.html'
Type - Broken
Make - Fors
Number_of_Functions - 16
Timer - 198 minutes
Pausing:
This will work with any number of tables as long as they have attributes of class="sTable". Hope this helps. God Bless,
Macster