I have a c# code that will read an html file and return it content as string/text.
One thing that I need to do is parse the html string, look for all <embed> tags, get the value in the "src" attribute then replace the entire <embed> tag with the content of the file that is found in the src tag.
I am trying to use the HtmlAgilityPack to allow me to parse the html code.
The only thing that I am not able to do is how to replace the <embed> tag with another string and finally return the new string with no <embed> tag to the user.
Here is what I have done
protected string ParseContent(string content)
{
if (content != null)
{
//Create a new document parser object
HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
//load the content
document.LoadHtml(content);
//Get all embed tags
IEnumerable<HtmlNode> embedNodes = document.DocumentNode.Descendants("embed");
//Make sure the content contains at least one <embed> tag
if (embedNodes.Count() > 0)
{
// Outputs the href for external links
foreach (HtmlNode embedNode in embedNodes)
{
//Mak sure there is a source
if (embedNode.Attributes.Contains("src"))
{
//If the file ends with ".html"
if (embedNode.Attributes["src"].Value.EndsWith(".html"))
{
var newContent = GetContent(embedNode.Attributes["src"].Value);
//Here I need to be able to replace the entireembedNode with the newContent
}
}
}
}
return content;
}
return null;
}
protected string GetContent(string path)
{
if (System.IO.File.Exists(path))
{
//The file exists, read its content
return System.IO.File.ReadAllText(path);
}
return null;
}
How can I replace the <embed> tag with a string?