This code was taken and revised from a previous discussion authored by DarthObiwan. Mainly, I moved removing any children to the end of the recursion cycle. That and I call removeChild in order to keepGrandChildren.
public void RemoveNotInWhiteList(HtmlNode pNode, IEnumerable<string> pWhiteList)
{
public void RemoveNotInWhiteList(HtmlNode pNode, IEnumerable<string> pWhiteList)
{
pNode.Attributes
.Where(att => !pWhiteList.Contains(att.Name))
.ToList()
.ForEach(att => att.Remove());
pNode.ChildNodes
.ToList()
.ForEach(att => RemoveNotInWhiteList(att, pWhiteList));
// this operation should be performed at the termination of all stack frames.
if (!pWhiteList.Contains(pNode.Name))
{
pNode.ParentNode.RemoveChild(pNode, true); // preserve children
return;
}
}