How to use the extruct.rdflibxml.utils.has_one_of_attributes function in extruct

To help you get started, we’ve selected a few extruct examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapinghub / extruct / extruct / rdflibxml / parse.py View on Github external
#
    if not has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src") :
        # nop, there is nothing to do here, just go down the tree and return...
        for n in node.childNodes :
            if n.nodeType == Node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples)
        return

    #-----------------------------------------------------------------
    # The goal is to establish the subject and object for local processing
    # The behaviour is slightly different depending on the presense or not
    # of the @rel/@rev attributes
    current_subject = None
    current_object  = None
    prop_object        = None

    if has_one_of_attributes(node, "rel", "rev")  :
        # in this case there is the notion of 'left' and 'right' of @rel/@rev
        # in establishing the new Subject and the objectResource
        current_subject = state.getResource("about","src")

        # get_URI may return None in case of an illegal CURIE, so
        # we have to be careful here, not use only an 'else'
        if current_subject == None :
            if node.hasAttribute("typeof") :
                current_subject = BNode()
            else :
                current_subject = parent_object
        else :
            state.reset_list_mapping(origin = current_subject)

        # set the object resource
        current_object = state.getResource("resource", "href")
github scrapinghub / extruct / extruct / rdflibxml / parse.py View on Github external
# This may add some triples to the target graph that does not originate from RDFa parsing
    # If the function return TRUE, that means that an rdf:RDF has been found. No
    # RDFa parsing should be done on that subtree, so we simply return...
    if state.options.embedded_rdf and node.nodeType == Node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state) :
        return

    #---------------------------------------------------------------------------------
    # calling the host language specific massaging of the DOM
    if state.options.host_language in host_dom_transforms and node.nodeType == Node.ELEMENT_NODE :
        for func in host_dom_transforms[state.options.host_language] : func(node, state)

    #---------------------------------------------------------------------------------
    # First, let us check whether there is anything to do at all. Ie,
    # whether there is any relevant RDFa specific attribute on the element
    #
    if not has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src", "vocab", "prefix") :
        # nop, there is nothing to do here, just go down the tree and return...
        for n in node.childNodes :
            if n.nodeType == Node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples)
        return

    #-----------------------------------------------------------------
    # The goal is to establish the subject and object for local processing
    # The behaviour is slightly different depending on the presense or not
    # of the @rel/@rev attributes
    current_subject = None
    current_object  = None
    typed_resource    = None

    if has_one_of_attributes(node, "rel", "rev")  :
        # in this case there is the notion of 'left' and 'right' of @rel/@rev
        # in establishing the new Subject and the objectResource
github scrapinghub / extruct / extruct / rdflibxml / property.py View on Github external
def generate_1_1(self) :
        """Generate the property object, 1.1 version"""

        #########################################################################
        # See if the target is _not_ a literal
        irirefs      = ("resource", "href", "src")
        noiri        = ("content", "datatype", "rel", "rev")
        notypediri   = ("content", "datatype", "rel", "rev", "about", "about_pruned")
        if has_one_of_attributes(self.node, irirefs) and not has_one_of_attributes(self.node, noiri) :
            # @href/@resource/@src takes the lead here...
            object = self.state.getResource(irirefs)
        elif self.node.hasAttribute("typeof") and not has_one_of_attributes(self.node, notypediri) and self.typed_resource != None :
                # a @typeof creates a special branch in case the typed resource was set during parsing
                object = self.typed_resource
        else :
            # We have to generate a literal

            # Get, if exists, the value of @datatype
            datatype = ''
            dtset    = False
            if self.node.hasAttribute("datatype") :
                dtset = True
                dt = self.node.getAttribute("datatype")
                if dt != "" :
                    datatype = self.state.getURI("datatype")
github scrapinghub / extruct / extruct / rdflibxml / parse.py View on Github external
state.reset_list_mapping(origin = current_subject)

        # set the object resource
        current_object = state.getResource("resource", "href", "src")

        if node.hasAttribute("typeof") and not node.hasAttribute("about") :
            if current_object == None :
                current_object = BNode()
            typed_resource = current_object

        if not node.hasAttribute("inlist") and current_object != None :
            # In this case the newly defined object is, in fact, the head of the list
            # just reset the whole thing.
            state.reset_list_mapping(origin = current_object)

    elif  node.hasAttribute("property") and not has_one_of_attributes(node, "content", "datatype") :
        current_subject = header_check(parent_object)

        # this is the case when the property may take hold of @src and friends...
        if node.hasAttribute("about") :
            current_subject = state.getURI("about")
            if node.hasAttribute("typeof") : typed_resource = current_subject

        # getURI may return None in case of an illegal CURIE, so
        # we have to be careful here, not use only an 'else'
        if current_subject == None :
            current_subject = parent_object
        else :
            state.reset_list_mapping(origin = current_subject)

        if typed_resource == None and node.hasAttribute("typeof") :
            typed_resource = state.getResource("resource", "href", "src")
github scrapinghub / extruct / extruct / rdflibxml / property.py View on Github external
def generate_1_1(self) :
        """Generate the property object, 1.1 version"""

        #########################################################################
        # See if the target is _not_ a literal
        irirefs      = ("resource", "href", "src")
        noiri        = ("content", "datatype", "rel", "rev")
        notypediri   = ("content", "datatype", "rel", "rev", "about", "about_pruned")
        if has_one_of_attributes(self.node, irirefs) and not has_one_of_attributes(self.node, noiri) :
            # @href/@resource/@src takes the lead here...
            object = self.state.getResource(irirefs)
        elif self.node.hasAttribute("typeof") and not has_one_of_attributes(self.node, notypediri) and self.typed_resource != None :
                # a @typeof creates a special branch in case the typed resource was set during parsing
                object = self.typed_resource
        else :
            # We have to generate a literal

            # Get, if exists, the value of @datatype
            datatype = ''
            dtset    = False
            if self.node.hasAttribute("datatype") :
                dtset = True
                dt = self.node.getAttribute("datatype")
                if dt != "" :
                    datatype = self.state.getURI("datatype")

            # Supress lange is set in case some elements explicitly want to supress the effect of language
            # There were discussions, for example, that the <time> element should do so. Although,</time>
github scrapinghub / extruct / extruct / rdflibxml / transform / __init__.py View on Github external
def set_about(node) :
        if has_one_of_attributes(node, "rel", "rev") :
            if not has_one_of_attributes(top, "about", "src") :
                node.setAttribute("about","")
        else :
            if not has_one_of_attributes(node, "href", "resource", "about", "src") :
                node.setAttribute("about","")
github scrapinghub / extruct / extruct / rdflibxml / parse.py View on Github external
def header_check(p_obj) :
        """Special disposition for the HTML  and  elements..."""
        if state.options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] :
            if node.nodeName == "head" or node.nodeName == "body" :
                if not has_one_of_attributes(node, "about", "resource", "src", "href") :
                    return p_obj
        else :
            return None
github scrapinghub / extruct / extruct / rdflibxml / transform / __init__.py View on Github external
    @type options: L{Options}
    @param state: top level execution state
    @type state: L{State}
    """
    def set_about(node) :
        if has_one_of_attributes(node, "rel", "rev") :
            if not has_one_of_attributes(top, "about", "src") :
                node.setAttribute("about","")
        else :
            if not has_one_of_attributes(node, "href", "resource", "about", "src") :
                node.setAttribute("about","")

    from ..host import HostLanguage
    from ..utils import has_one_of_attributes

    if not has_one_of_attributes(root, "about") :
        # The situation is a bit complicated: if a @resource is present without anything else, then it sets
        # the subject, ie, should be accepted...
        if has_one_of_attributes(root, "resource", "href", "src") :
            if has_one_of_attributes(root, "rel", "rev","property") :
                root.setAttribute("about","")
        else :
            root.setAttribute("about","")

    if options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] :
        if state.rdfa_version &gt;= "1.1" :
            pass
        else :
            for top in root.getElementsByTagName("head") :
                if not has_one_of_attributes(top, "href", "resource", "about", "src") :
                    set_about(top)
            for top in root.getElementsByTagName("body") :
github scrapinghub / extruct / extruct / rdflibxml / transform / __init__.py View on Github external
def set_about(node) :
        if has_one_of_attributes(node, "rel", "rev") :
            if not has_one_of_attributes(top, "about", "src") :
                node.setAttribute("about","")
        else :
            if not has_one_of_attributes(node, "href", "resource", "about", "src") :
                node.setAttribute("about","")