XmlTextReader is ignoring DtdProcessing flag

554 Views Asked by At

I am trying to load xml content using XmlTextReader but for some reason, XmlTextReader is ignoring DtdProcessing flag while processing Xml. DtdProcessing flag is working fine if I use XmlReader instead. Problem with XmlReader is that it automatically normalize \r\n\ to \n which I don't want in my output string.

Here is my code snippet:

XmlDocument xmlDocument = new XmlDocument();

string contents = @"<?xml version='1.0' encoding='ISO-8859-1' standalone='yes'?>
    <!DOCTYPE content [<!ENTITY ouml '&#246;'>]>
    <content>Test &ouml; Test

    Test</content>";

byte[] byteArray = Encoding.UTF8.GetBytes(contents);
MemoryStream stream = new MemoryStream(byteArray);

//XmlReaderSettings settings = new XmlReaderSettings();
//settings.DtdProcessing = DtdProcessing.Parse;
//settings.IgnoreWhitespace = false;
//XmlReader reader = XmlReader.Create(stream, settings);
//xmlDocument.Load(reader);

XmlTextReader reader = new XmlTextReader(stream);
reader.DtdProcessing = DtdProcessing.Parse;
xmlDocument.Load(reader);

Console.WriteLine(xmlDocument.OuterXml);

Output I am getting from above processing:

"<?xml version=\"1.0\" encoding=\"ISO-8859-1\" standalone=\"yes\"?><!DOCTYPE content[<!ENTITY ouml '&#246;'>]><content>Test &ouml; Test\r\n\r\n    Test</content>"

Instead I want output string with the DTD processed:

"<?xml version=\"1.0\" encoding=\"ISO-8859-1\" standalone=\"yes\"?><!DOCTYPE content[<!ENTITY ouml '&#246;'>]><content>Test ö Test\r\n\r\n    Test</content>"
1

There are 1 best solutions below

0
jdweng On

Code would look something like this

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Xml;
using System.Xml.Linq;
using System.IO;


namespace ConsoleApplication31
{
    class Program
    {
        const string FILENAME = @"c:\temp\test.xml";
        static void Main(string[] args)
        {
            XmlDocument xmlDocument = new XmlDocument();
            try
            {
                string contents = @"<?xml version='1.0' encoding='ISO-8859-1' standalone='yes'?>
                <!DOCTYPE content [<!ENTITY ouml '&#246;'>]>
                <content>Test &ouml; Test
                Test</content>";

                MemoryStream stream = new MemoryStream();
                XmlTextWriter writer = new XmlTextWriter(stream, Encoding.GetEncoding("ISO-8859-1"));
                writer.WriteString(contents);
                writer.Flush();

                byte[] bytes = new byte[stream.Length];
                stream.Position = 0;
                stream.Read(bytes, 0, (int)stream.Length);
                Console.WriteLine(Encoding.GetEncoding("ISO-8859-1").GetString(bytes));
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }



        }
    }

}