# -*- coding: iso-8859-1 -*-
#Required in Python 2.2, and must be the first import
from __future__ import generators
from xml.dom import Node

def doc_order_iter(node):
    """
    Iterates over each node in document order,
    yielding each in turn, starting with the given node
    node - the starting point
           (subtree rooted at node will be iterated over document order)
    """
    #Document order returns the current node,
    #then each of its children in turn
    yield node
    for child in node.childNodes:
        #Create a generator for each child,
        #Over which to iterate
        for cn in doc_order_iter(child):
            yield cn
    return


def doc_order_iter_filter(node, filter_func):
    """
    Iterates over each node in document order,
    applying the filter function to each in turn,
    starting with the given node, and yielding each node in
    cases where the filter function computes true
    node - the starting point
           (subtree rooted at node will be iterated over document order)
    filter_func - a callable object taking a node and returning
                  true or false
    """
    if filter_func(node):
        yield node
    for child in node.childNodes:
        for cn in doc_order_iter_filter(child, filter_func):
            yield cn
    return

def get_elements_by_tag_name(node, name):
    """
    Returns an iterator (in document order) over all the element nodes
    that are descendants of the given one, and have the given tag name.
    name - the node name to check
    """
    return doc_order_iter_filter(node, lambda n: n.nodeType == \
        Node.ELEMENT_NODE and n.nodeName == name)


def get_elements_by_tag_name_ns(node, ns, local):
    """
    Returns an iterator (in document order) over all the element nodes
    that are descendants of the given one, and have the given namespace
    and local name.
    node - the starting point (subtree rooted at node will be searched)
           node will be returned if it has the right ns and local name
    ns - the namespace to check
    local - the local name to check
    """
    return doc_order_iter_filter(node, lambda n: n.nodeType == \
        Node.ELEMENT_NODE and n.namespaceURI == ns and n.localName == local)


def get_first_element_by_tag_name_ns(node, ns, local):
    """
    Returns the first element in document order with the given namespace
    and local name
    node - the starting point (subtree rooted at node will be searched)
           node will be returned if it has the right ns and local name
    ns - the namespace to check
    local - the local name to check
    """
    return get_elements_by_tag_name_ns(node, ns, local).next()


def string_value(node):
    """
    Return the XPath string value of the given node
    This basically consists of one string comprising
    all the character data in the node and its descendants
    node - the starting point
    """
    text_nodes = doc_order_iter_filter(node, lambda n: n.nodeType == Node.TEXT_NODE)
    return u''.join([ n.data for n in text_nodes ])


def text_search(node, sought):
    """
    Return a list of descendant elements which contain a given substring
    in their text
    node - the starting point (subtree rooted at node will be searched)
    sought - the substring to find
    """
    return doc_order_iter_filter(node, lambda n: n.nodeType == Node.ELEMENT_NODE and [ 1 for cn in n.childNodes if cn.nodeType == Node.TEXT_NODE and cn.data.find(sought) != -1 ] )


def get_leaf_elements(node):
    """
    Return a list of elements that have no element children
    node - the starting point (subtree rooted at node will be searched)
    """
    return doc_order_iter_filter(node, lambda n: n.nodeType == Node.ELEMENT_NODE and not [ 1 for cn in n.childNodes if cn.nodeType == Node.ELEMENT_NODE ])


def rename_element(elem, new_ns, new_qname):
    new = elem.rootNode.createElementNS(new_ns, new_qname)
    for child in elem.childNodes:
        elem.removeChild(child)
        new.appendChild(child)
    elem.parentNode.replaceChild(new, elem)
    return new

#
# The following functions do not use the generator/iterator framework
#

#Mapping from node type to XPath node test function name
OTHER_NODES = {
    Node.TEXT_NODE: 'text',
    Node.COMMENT_NODE: 'comment',
    Node.PROCESSING_INSTRUCTION_NODE: 'processing-instruction'
    }

def abs_path(node):
    #based on code developed by Florian Bösch on XML-SIG
    #http://mail.python.org/pipermail/xml-sig/2004-August/010423.html
    #Significantly enhanced to use Unicode properly, support more
    #node types, use safer node type tests, etc.
    """
    Return an XPath expression that provides a unique path to
    the given node (supports elements, attributes, root nodes,
    text nodes, comments and PIs) within a document
    """
    if node.nodeType == Node.ELEMENT_NODE:
        count = 1
        #Count previous siblings with same node name
        previous = node.previousSibling
        while previous:
            if previous.localName == node.localName: count += 1
            previous = previous.previousSibling
        step = u'%s[%i]' % (node.nodeName, count)
        ancestor = node.parentNode
    elif node.nodeType == Node.ATTRIBUTE_NODE:
        step = u'@%s' % (node.nodeName)
        ancestor = node.ownerElement
    elif node.nodeType in OTHER_NODES:
        #Text nodes, comments and PIs
        count = 1
        #Count previous siblings of the same node type
        previous = node.previousSibling
        while previous:
            if previous.nodeType == node.nodeType: count += 1
            previous = previous.previousSibling
        test_func = OTHER_NODES[node.nodeType]
        step = u'%s()[%i]' % (test_func, count)
        ancestor = node.parentNode
    elif not node.parentNode:
        #Root node
        step = u''
        ancestor = node
    else:
        raise TypeError('Unsupported node type for abs_path')
    if ancestor.parentNode:
        return abs_path(ancestor) + u'/' + step
    else:
        return u'/' + step


#def insert_xml(node, position, text):
#    """
#    Parse some text and insert it as a child of an element
#    """


#
# The command line code executes a simple battery of tests
#
if __name__ == "__main__":
    #The following exercises the code as demonstrated in the article
    DOC1 = """<?xml version='1.0' encoding='iso-8859-1'?>
<verse>
  <attribution>Louis MacNeice</attribution>
  <line>The Laird o' Phelps spent Hogmanay declaring he was sober,</line>
  <line>Counted his feet to prove the fact and found he had one foot over.</line>
  <line>Mrs. Carmichael had her fifth, looked at the job with repulsion,</line>
  <line>Said to the midwife "Take it away; I'm through with overproduction."</line>
</verse>"""

    DOC2 = """<?xml version='1.0' encoding='iso-8859-1'?>
<nest>
  <bird>cheep</bird> in the
    <nest>
      <bird>cheep</bird> in the <nest>bird</nest>
    </nest>
  <nest>bird</nest> upon a <tree><nest>bird</nest></tree>
</nest>
"""

    DOC3 = """<?xml version="1.0" encoding="iso-8859-1"?>
<labels>
  <label added="2003-06-20">
    <quote>
      <!-- Mixed content -->
      <emph>Midwinter Spring</emph> is its own season&#8230;
    </quote>
    <name>Thomas Eliot</name>
    <address>
      <street>3 Prufrock Lane</street>
      <city>Stamford</city>
      <state>CT</state>
    </address>
  </label>
  <label added="2003-06-10">
    <name>Ezra Pound</name>
    <address>
      <street>45 Usura Place</street>
      <city>Hailey</city>
      <state>ID</state>
    </address>
  </label>
</labels>
    """

    from Ft.Xml.Domlette import NonvalidatingReader
    doc1 = NonvalidatingReader.parseString(DOC1, "http://dummy.ns")
    doc2 = NonvalidatingReader.parseString(DOC2, "http://dummy.ns")
    doc3 = NonvalidatingReader.parseString(DOC3, "http://dummy.ns")

    print "*"*10, "Listing 3", "*"*10
    print "Get elements by tag name:"
    for node in get_elements_by_tag_name(doc1, 'line'):
        print node
    print "Get elements by tag name ns:"
    for node in get_elements_by_tag_name_ns(doc1, None, 'line'):
        print node

    from Ft.Xml.XPath import Evaluate, Context
    from Ft.Xml.Domlette import GetAllNs

    def get_elements_by_tag_name_ns_xpath(node, name):
        #I need the context node and a set of namespace mappings
        #In case they use a QName for name e.g. ns:local
        #Use the dictionary of all namespace mappings on the given node
        context = Context.Context(node, GetAllNs(node))
        #Now do an XPath evaluation of an expression that searches
        #The descendant-or-self axis for all nodes whose name
        #matches the given
        return Evaluate(".//" + name, context=context)

    print "Get elements by tag name (XPath):"
    for node in get_elements_by_tag_name_ns_xpath(doc1, 'line'):
        print node

    def get_elements_by_tag_name_ns_raw(node, ns, local):
        if node.nodeType == Node.ELEMENT_NODE and \
           node.namespaceURI == ns and node.localName == local:
            result = [node]
        else:
            result = []
        for child in node.childNodes:
            result.extend(get_elements_by_tag_name_ns_raw(child, ns, local))
        return result

    print "Get elements by tag name (Raw recursion):"
    for node in get_elements_by_tag_name_ns_raw(doc2, None, 'nest'):
        print node

    print "abs_path test"
    print repr(abs_path(doc3))
    node = doc3.documentElement
    print repr(abs_path(node))
    node = get_first_element_by_tag_name_ns(doc3, None, 'label')
    print repr(abs_path(node))
    node = list(get_elements_by_tag_name_ns(doc3, None, 'label'))[1]
    print repr(abs_path(node))
    node = node.attributes[(None, 'added')]
    print repr(abs_path(node))

    NAME = "GAMES"
    NAME = "v"
    NAME = "LINE"
    BIG_FILE_NAMES = ["1998statistics.xml", "ot.xml", "hamlet.xml"]
    for fname in BIG_FILE_NAMES:
        try:
            big_doc = NonvalidatingReader.parseUri(fname)
        except:
            print "Unable to load", fname
            continue
        element_count = len(
            get_elements_by_tag_name_ns_raw(big_doc, None, NAME)
            )
        print "Number of", NAME, "elements found:", element_count
        import time
        start = time.time()
        for i in range(10):
            for node in get_elements_by_tag_name_ns(big_doc, None, NAME):
                pass
        end = time.time()
        print "Generator getElementsByTagNameNS: %.2f"%(end-start)

        start = time.time()
        for i in range(10):
            for node in get_elements_by_tag_name_ns_xpath(big_doc, NAME):
                pass
        end = time.time()
        print "XPath getElementsByTagNameNS: %.2f"%(end-start)

        start = time.time()
        for i in range(10):
            for node in get_elements_by_tag_name_ns_raw(big_doc, None, NAME):
                pass
        end = time.time()
        print "Raw recursion getElementsByTagNameNS: %.2f"%(end-start)

