XML Parser for Oracle PL/SQL

1- Introduction

This document is based on:
  • Oracle 9i (10g,11g, 12c)

2- XML Parse API

Oracle provides you two basic API to parse XML:
  • DOM (Document Object Model)
  • XSLT & XPath

3- XML Data Sources

You can parse an XML document with a data source from:
  1. XML data source is a file.
  2. Source data is text (varchar2, ..)
  3. Source data is CLOB

4- Parsing XML from Text or CLOB

For example, parse a simple XML (source text):
Parse_Xml_Example
Create Or Replace Procedure Parse_Xml_Example As
   p              Dbms_Xmlparser.Parser;
   v_Doc          Dbms_Xmldom.Domdocument;
   v_Root_Element Dbms_Xmldom.Domelement;
   v_Child_Nodes  Dbms_Xmldom.Domnodelist;
   v_Child_Node   Dbms_Xmldom.Domnode;
   v_Text_Node    Dbms_Xmldom.Domnode;
   v_Emp_Nodes    Dbms_Xmldom.Domnodelist;
   v_Emp_Node     Dbms_Xmldom.Domnode;
   ---
   v_Xml_Data Varchar2(4000);
   v_Deptno   Varchar2(30);
   v_Dname    Varchar2(100);
   v_Location Varchar2(255);
   v_Empno    Varchar2(30);
   v_Ename    Varchar2(100);
   v_Job      Varchar2(100);
   v_Hiredate Date;
   v_Mrg      Number;
   v_Sal      Number;
   --
   v_Attr_Nodes     Dbms_Xmldom.Domnamednodemap;
   v_Attr_Node      Dbms_Xmldom.Domnode;
   v_Attribute_Name Varchar2(50);
   v_Node_Name      Varchar2(50);
   v_Node_Value     Varchar2(100);
Begin
   -- Note text contains no <?xml version="1"?>
   v_Xml_Data := '<department deptno="10" dname="ACCOUNTING" location="NEW YORK">
                   <employee empno="7782" ename="CLARK">
                      <job>MANAGER</job>
                      <mrg>7839</mrg>
                      <hiredate>6/9/1981</hiredate>
                      <sal>2450.00</sal>
                   </employee>
                   <employee empno="7839" ename="KING">
                      <job>PRESIDENT</job>
                      <mrg></mrg>
                      <hiredate>11/17/1981</hiredate>
                      <sal>5000.00</sal>
                   </employee>
                   <employee empno="7934" ename="MILLER">
                      <job>CLERK</job>
                      <mrg>7782</mrg>
                      <hiredate>1/23/1982</hiredate>
                      <sal>1300.00</sal>
                   </employee>
                </department>';
   -- Create XML Parser.
   p := Dbms_Xmlparser.Newparser;
   Dbms_Xmlparser.Setvalidationmode(p
                                   ,False);
   -- Parse XML into DOM object                       
   Dbms_Xmlparser.Parsebuffer(p
                             ,v_Xml_Data);
   -- Document              
   v_Doc := Dbms_Xmlparser.Getdocument(p);
   -- Root element (<department>)
   v_Root_Element := Dbms_Xmldom.Getdocumentelement(v_Doc);
   -- Get attribute value
   v_Deptno   := Dbms_Xmldom.Getattribute(v_Root_Element
                                         ,'deptno');
   v_Dname    := Dbms_Xmldom.Getattribute(v_Root_Element
                                         ,'dname');
   v_Location := Dbms_Xmldom.Getattribute(v_Root_Element
                                         ,'location');
   ---------                              
   Dbms_Output.Put_Line('v_Deptno=' || v_Deptno);
   Dbms_Output.Put_Line('v_Dname=' || v_Dname);
   Dbms_Output.Put_Line('v_Location=' || v_Location);
   --------                               
   -- Node list (employee) of v_Root_Element (Dbms_xmldom.Domnodelist)
   v_Emp_Nodes := Dbms_Xmldom.Getelementsbytagname(v_Root_Element
                                                  ,'employee');
   For j In 0 .. Dbms_Xmldom.Getlength(v_Emp_Nodes) Loop
      v_Emp_Node := Dbms_Xmldom.Item(v_Emp_Nodes
                                    ,j);
      -- Attribute List (Dbms_xmldom.Domnamednodemap)                           
      v_Attr_Nodes := Dbms_Xmldom.Getattributes(v_Emp_Node);
      --
      If (Dbms_Xmldom.Isnull(v_Attr_Nodes) = False) Then
         For i In 0 .. Dbms_Xmldom.Getlength(v_Attr_Nodes) - 1 Loop
            v_Attr_Node := Dbms_Xmldom.Item(v_Attr_Nodes
                                           ,i);
            v_Node_Name := Dbms_Xmldom.Getnodename(v_Attr_Node);
            --
            If v_Node_Name = 'empno' Then
               v_Empno := Dbms_Xmldom.Getnodevalue(v_Attr_Node);
            Elsif v_Node_Name = 'ename' Then
               v_Ename := Dbms_Xmldom.Getnodevalue(v_Attr_Node);
            End If;
         End Loop;
         Dbms_Output.Put_Line('v_Empno=' || v_Empno);
         Dbms_Output.Put_Line('v_Ename=' || v_Ename);
      End If;
      ----
      -- Child nodes of employee node.
      --
      v_Child_Nodes := Dbms_Xmldom.Getchildnodes(v_Emp_Node);
      --
      For i In 0 .. Dbms_Xmldom.Getlength(v_Child_Nodes) - 1 Loop
         -- <job>,<mrg>,<hiredate>,<sal>
         v_Child_Node := Dbms_Xmldom.Item(v_Child_Nodes
                                         ,i);
         v_Node_Name  := Dbms_Xmldom.Getnodename(v_Child_Node);
         v_Text_Node  := Dbms_Xmldom.Getfirstchild(v_Child_Node);
         v_Node_Value := Dbms_Xmldom.Getnodevalue(v_Text_Node);
         --
         If v_Node_Name = 'job' Then
            v_Job := v_Node_Value;
         Elsif v_Node_Name = 'mrg' Then
            v_Mrg := To_Number(v_Node_Value);
         Elsif v_Node_Name = 'hiredate' Then
            v_Hiredate := To_Date(v_Node_Value
                                 ,'MM/dd/yyyy');
         Elsif v_Node_Name = 'sal' Then
            v_Sal := To_Number(v_Node_Value);
         End If;
      End Loop;
      --
      Dbms_Output.Put_Line('v_Job=' || v_Job);
      Dbms_Output.Put_Line('v_Mrg=' || v_Mrg);
      Dbms_Output.Put_Line('v_Hiredate=' || v_Hiredate);
      Dbms_Output.Put_Line('v_Sal=' || v_Sal);
   End Loop;
End;

 
Running procedure:
begin
 -- Call the procedure
 parse_xml_example;
end;
You can also parse XML from a CLOB.
Declare
   p               Dbms_Xmlparser.Parser;
   v_Xml_Clob      Clob;
   v_Doc           Dbms_Xmldom.Domdocument;
   v_Root_Element  Dbms_Xmldom.Domelement;
   v_Child_Nodes   Dbms_Xmldom.Domnodelist;
   v_Greeting_Node Dbms_Xmldom.Domnode;
   v_Text_Node     Dbms_Xmldom.Domnode;
   v_Text          Varchar2(100);
Begin
   -- CLOB data
   v_Xml_Clob := '<data><greeting>Hello</greeting></data>';
   -- Create XML Parser.
   p := Dbms_Xmlparser.Newparser;
   -- Parse XML into DOM object                      
   Dbms_Xmlparser.Parseclob(p
                           ,v_Xml_Clob);
   -- XML Document                 
   v_Doc := Dbms_Xmlparser.Getdocument(p);
   -- Root element 
   v_Root_Element := Dbms_Xmldom.Getdocumentelement(v_Doc);
   -- Child nodes 'greeting'.
   v_Child_Nodes := Dbms_Xmldom.Getchildrenbytagname(v_Root_Element
                                                    ,'greeting');
   -- First node in list
   v_Greeting_Node := Dbms_Xmldom.Item(v_Child_Nodes
                                      ,0);
   v_Text_Node     := Dbms_Xmldom.Getfirstchild(v_Greeting_Node);
   -- Hello
   v_Text := Dbms_Xmldom.Getnodevalue(v_Text_Node);
   --
   Dbms_Output.Put_Line('Greeting:' || v_Text);
End;

5- Parsing XML file

Firstly you need to create a virtual directory, and assign permissions to users on that directory.
-- Create DBA directory.

Create Directory MY_XML_DIR as 'C:/TEMP';

-- Grant read & write to user scott.

Grant Read,Write on Directory MY_XML_DIR to scott;
The following example parse a xml file:
C:/TEMP/company.xml
<company id="111" companyName="Microsoft">
<websites>
  <website>http://microsoft.com</website>
  <website>http://msn.com</website>
  <website>http://hotmail.com</website>
</websites>
<address>
  <street>1 Microsoft Way</street>
  <city>Redmond</city>
</address>
</company>
Parse_Xml_File_Example
Create Or Replace Procedure Parse_Xml_File_Example As
   v_Bfile    Bfile;
   v_Xml_Clob Clob;
   ---
   p              Dbms_Xmlparser.Parser;
   v_Doc          Dbms_Xmldom.Domdocument;
   v_Root_Element Dbms_Xmldom.Domelement;
   v_Child_Nodes  Dbms_Xmldom.Domnodelist;
   v_Child_Node   Dbms_Xmldom.Domnode;
   v_Text_Node    Dbms_Xmldom.Domnode;
   v_Text         Varchar2(100);
   ----
   v_Dest_Offset  Integer := 1;
   v_Src_Offset   Integer := 1;
   v_Lang_Context Number := Dbms_Lob.Default_Lang_Ctx;
   v_Warning      Integer;
   --
   v_Value Varchar2(255);
Begin
   -- Object representing XML file.
   v_Bfile := Bfilename('MY_XML_DIR'
                       ,'company.xml');
   -- Create Empty CLOB
   -- Tạo dữ liệu CLOB rỗng
   Dbms_Lob.Createtemporary(v_Xml_Clob
                           ,Cache => False);
   -- Open file                     
   Dbms_Lob.Open(v_Bfile
                ,Dbms_Lob.Lob_Readonly);
   -- Load file to CLOB
   Dbms_Lob.Loadclobfromfile(v_Xml_Clob -- Dest_Lob IN OUT  
                            ,v_Bfile -- Src_Lob In   
                            ,Dbms_Lob.Getlength(v_Bfile) -- Amount In  
                            ,v_Dest_Offset -- Dest_Offset IN OUT
                            ,v_Src_Offset -- Src_Offset In Out
                            ,Dbms_Lob.Default_Csid -- Bfile_Csid In  
                            ,v_Lang_Context -- Lang_Context In Out
                            ,v_Warning -- Warning OUT  
                             );
   -- After read, close it.
   Dbms_Lob.Close(v_Bfile);
   --
   -- Create XML Parser.
   p := Dbms_Xmlparser.Newparser;
   --
   -- Parse XML into DOM object                      
   Dbms_Xmlparser.Parseclob(p
                           ,v_Xml_Clob);
   -- Document object.                   
   v_Doc := Dbms_Xmlparser.Getdocument(p);
   -- Root element (<company>)
   v_Root_Element := Dbms_Xmldom.Getdocumentelement(v_Doc);
   --
   v_Value := Dbms_Xmldom.Getattribute(v_Root_Element
                                      ,'id');
   Dbms_Output.Put_Line('id=' || v_Value);
   v_Value := Dbms_Xmldom.Getattribute(v_Root_Element
                                      ,'companyName');
   Dbms_Output.Put_Line('companyName=' || v_Value);
   --- return Dbms_Xmldom.Domnodelist
   v_Child_Nodes := Dbms_Xmldom.Getchildrenbytagname(v_Root_Element
                                                    ,'*');
   For i In 0 .. Dbms_Xmldom.Getlength(v_Child_Nodes) - 1 Loop
      v_Child_Node := Dbms_Xmldom.Item(v_Child_Nodes
                                      ,i);
      --
      If Dbms_Xmldom.Getnodename(v_Child_Node) = 'websites' Then
         Dbms_Output.Put_Line('Found websites');
         -- ...
      Elsif Dbms_Xmldom.Getnodename(v_Child_Node) = 'address' Then
         Dbms_Output.Put_Line('Found address');
         -- ...
      End If;
   End Loop;
Exception
   When Others Then
      Dbms_Output.Put_Line(Sqlerrm);
      Dbms_Lob.Freetemporary(v_Xml_Clob);
      Dbms_Xmlparser.Freeparser(p);
      Dbms_Xmldom.Freedocument(v_Doc);
End;
 
Running procedure:
begin
  -- Call the procedure
  parse_xml_file_example;
end;

6- Parsing XML with Dbms_Xslprocessor

Oracle provides you Dbms_Xslprocessor package, helps you to access to XML data quick and easy. Consider the following example:
Declare
   v_Xml_Clob Clob;
   ---
   p                Dbms_Xmlparser.Parser;
   v_Doc            Dbms_Xmldom.Domdocument;
   v_Root_Element   Dbms_Xmldom.Domelement;
   v_Child_Nodes    Dbms_Xmldom.Domnodelist;
   v_Current_Node   Dbms_Xmldom.Domnode;
   v_Websites_Nodes Dbms_Xmldom.Domnodelist;
   --
   v_Id           Number;
   v_Company_Name Varchar2(255);
   v_Street       Varchar2(255);
   v_City         Varchar2(50);
   v_Note         Varchar2(255);
Begin
   v_Xml_Clob := '<companies xmlns:my-ns="http://somedomain.com/abc">
                    <company id="111" companyName="Microsoft">
                       <websites>
                          <website>http://microsoft.com</website>
                          <website>http://msn.com</website>
                          <website>http://hotmail.com</website>
                       </websites>
                       <address>
                          <street>1 Microsoft Way</street>
                          <city>Redmond</city>
                       </address>
                       <my-ns:note>Microsoft Note</my-ns:note>
                    </company>
                    <company id="100" companyName="Apple">
                       <websites>
                          <website>http://applet.com</website>
                       </websites>
                       <address>
                          <street>1 Infinite Loop</street>
                          <city>Cupertino</city>
                       </address>
                       <my-ns:note>Apple Note</my-ns:note>
                    </company>
                 </companies>';
   --
   -- Create XML Parser.
   p := Dbms_Xmlparser.Newparser;
   --
   -- Parse XML into DOM object                        
   Dbms_Xmlparser.Parseclob(p
                           ,v_Xml_Clob);
   -- Document Element              
   v_Doc := Dbms_Xmlparser.Getdocument(p);
   -- Root element (<companies>)
   v_Root_Element := Dbms_Xmldom.Getdocumentelement(v_Doc);
   --- return Dbms_Xmldom.Domnodelist
   v_Child_Nodes := Dbms_Xmldom.Getchildrenbytagname(v_Root_Element
                                                    ,'*');
   For i In 0 .. Dbms_Xmldom.Getlength(v_Child_Nodes) - 1 Loop
      -- <company> Node.
      v_Current_Node := Dbms_Xmldom.Item(v_Child_Nodes
                                        ,i);
      Dbms_Xslprocessor.Valueof(v_Current_Node
                               ,'@id'
                               ,v_Id -- OUT
                                );
      Dbms_Xslprocessor.Valueof(v_Current_Node
                               ,'@companyName'
                               ,v_Company_Name -- OUT
                                );
      Dbms_Xslprocessor.Valueof(v_Current_Node
                               ,'address/street/text()'
                               ,v_Street -- OUT
                                );
      Dbms_Xslprocessor.Valueof(v_Current_Node
                               ,'address/city/text()'
                               ,v_City -- OUT
                                );
      -- Case element have namespace
      -- Must specify the fourth parameter.           
      Dbms_Xslprocessor.Valueof(v_Current_Node
                               ,'my-ns:note/text()'
                               ,v_Note -- OUT
                               ,'xmlns:my-ns=http://somedomain.com/abc');
      Dbms_Output.Put_Line('v_Id=' || v_Id);
      Dbms_Output.Put_Line('v_company_Name=' || v_Company_Name);
      Dbms_Output.Put_Line(' - v_street=' || v_Street);
      Dbms_Output.Put_Line(' - v_city=' || v_City);
      Dbms_Output.Put_Line(' - v_Note=' || v_Note);
      -- Selects nodes from the tree which match the given pattern
      -- return Dbms_Xmldom.Domnodelist
      v_Websites_Nodes := Dbms_Xslprocessor.Selectnodes(v_Current_Node
                                                       ,'websites/website');
      For j In 0 .. Dbms_Xmldom.Getlength(v_Websites_Nodes) - 1 Loop
         v_Current_Node := Dbms_Xmldom.Item(v_Websites_Nodes
                                           ,j);
         Dbms_Output.Put_Line(' - website=' ||
                              Dbms_Xslprocessor.Valueof(v_Current_Node
                                                       ,'text()'));
      End Loop;
   End Loop;
Exception
   When Others Then
      Dbms_Output.Put_Line(Sqlerrm);
      Dbms_Lob.Freetemporary(v_Xml_Clob);
      Dbms_Xmlparser.Freeparser(p);
      Dbms_Xmldom.Freedocument(v_Doc);
End;
 
Running example:
If your element has the namespace:
<companies xmlns:my-ns="http://somedomain.com/abc">

    <company id="111" companyName="Microsoft">

       .....

       <my-ns:note>Microsoft Note</my-ns:note>

    </company>

    ....

 </companies>
You need to add the parameter specified namespace:
Dbms_Xslprocessor.Valueof(v_Current_Node
                       ,'my-ns:note/text()'
                       ,v_Note -- OUT
                       ,'xmlns:my-ns=http://somedomain.com/abc');
Otherwise you will get the error:
ORA-31011: XML parsing failed
ORA-19202: Error occurred in XML processing
LPX-00601: Invalid token in: 'my-ns:note/text()'