How to use the extruct.rdflibxml.state.ExecutionContext function in extruct

To help you get started, we’ve selected a few extruct examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapinghub / extruct / extruct / rdflibxml / state.py View on Github external
"""
        if self.node.hasAttribute(attr) :
            val = self.node.getAttribute(attr)
        else :
            if attr in ExecutionContext._list :
                return []
            else :
                return None

        # This may raise an exception if the attr has no key. This, actually,
        # should not happen if the code is correct, but it does not harm having it here...
        try :
            func = ExecutionContext._resource_type[attr]
        except :
            # Actually, this should not happen...
            func = ExecutionContext._URI

        if attr in ExecutionContext._list :
            # Allows for a list
            resources = [ func(self, v.strip()) for v in val.strip().split() if v != None ]
            retval = [ r for r in resources if r != None ]
        else :
            retval = func(self, val.strip())
        return retval
    # end getURI
github scrapinghub / extruct / extruct / rdflibxml / parse.py View on Github external
    @param node: the DOM node to handle
    @param graph: the RDF graph
    @type graph: RDFLib's Graph object instance
    @param parent_object: the parent's object, as an RDFLib URIRef
    @param incoming_state: the inherited state (namespaces, lang, etc.)
    @type incoming_state: L{state.ExecutionContext}
    @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not)
    by the current node.
    @return: whether the caller has to complete it's parent's incomplete triples
    @rtype: Boolean
    """

    # Update the state. This means, for example, the possible local settings of
    # namespaces and lang
    state = None
    state = ExecutionContext(node, graph, inherited_state=incoming_state)

    #---------------------------------------------------------------------------------
    # Handling the role attribute is pretty much orthogonal to everything else...
    handle_role_attribute(node, graph, state)

    #---------------------------------------------------------------------------------
    # Handle the special case for embedded RDF, eg, in SVG1.2.
    # This may add some triples to the target graph that does not originate from RDFa parsing
    # If the function return TRUE, that means that an rdf:RDF has been found. No
    # RDFa parsing should be done on that subtree, so we simply return...
    if state.options.embedded_rdf and node.nodeType == Node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state) :
        return

    #---------------------------------------------------------------------------------
    # calling the host language specific massaging of the DOM
    if state.options.host_language in host_dom_transforms and node.nodeType == Node.ELEMENT_NODE :
github scrapinghub / extruct / extruct / rdflibxml / parse.py View on Github external
if state.options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] :
            if node.nodeName == "head" or node.nodeName == "body" :
                if not has_one_of_attributes(node, "about", "resource", "src", "href") :
                    return p_obj
        else :
            return None

    def lite_check() :
        if state.options.check_lite and state.options.host_language in [ HostLanguage.html5, HostLanguage.xhtml5, HostLanguage.xhtml ] :
            if node.tagName == "link" and node.hasAttribute("rel") and state.term_or_curie.CURIE_to_URI(node.getAttribute("rel")) != None :
                state.options.add_warning("In RDFa Lite, attribute @rel in  is only used in non-RDFa way (consider using @property)", node=node)

    # Update the state. This means, for example, the possible local settings of
    # namespaces and lang
    state = None
    state = ExecutionContext(node, graph, inherited_state=incoming_state)

    #---------------------------------------------------------------------------------
    # Extra warning check on RDFa Lite
    lite_check()

    #---------------------------------------------------------------------------------
    # Handling the role attribute is pretty much orthogonal to everything else...
    handle_role_attribute(node, graph, state)

    #---------------------------------------------------------------------------------
    # Handle the special case for embedded RDF, eg, in SVG1.2.
    # This may add some triples to the target graph that does not originate from RDFa parsing
    # If the function return TRUE, that means that an rdf:RDF has been found. No
    # RDFa parsing should be done on that subtree, so we simply return...
    if state.options.embedded_rdf and node.nodeType == Node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state) :
        return
github scrapinghub / extruct / extruct / rdflibxml / state.py View on Github external
try :
                # To be on the safe side:-)
                t = urlparse(uri)
                return urlunparse((t[0],t[1],t[2],t[3],t[4],""))
            except :
                return uri

        # This is, conceptually, an additional class initialization, but it must be done run time, otherwise import errors show up
        if len(    ExecutionContext._resource_type ) == 0 :
            ExecutionContext._resource_type = {
                "href"        :    ExecutionContext._URI,
                "src"        :    ExecutionContext._URI,
                "vocab"        :   ExecutionContext._URI,

                "about"        :    ExecutionContext._CURIEorURI,
                "resource"    :    ExecutionContext._CURIEorURI,

                "rel"        :    ExecutionContext._TERMorCURIEorAbsURI,
                "rev"        :    ExecutionContext._TERMorCURIEorAbsURI,
                "datatype"    :    ExecutionContext._TERMorCURIEorAbsURI,
                "typeof"    :    ExecutionContext._TERMorCURIEorAbsURI,
                "property"    :    ExecutionContext._TERMorCURIEorAbsURI,
                "role"        :    ExecutionContext._TERMorCURIEorAbsURI,
            }
        #-----------------------------------------------------------------
        self.node = node

        #-----------------------------------------------------------------
        # Settling the base. In a generic XML, xml:base should be accepted at all levels (though this is not the
        # case in, say, XHTML...)
        # At the moment, it is invoked with a 'None' at the top level of parsing, that is
        # when the  element is looked for (for the HTML cases, that is)
github scrapinghub / extruct / extruct / rdflibxml / __init__.py View on Github external
for k,ns in fromg.namespaces() :
                tog.bind(k,ns)

        if graph == None :
            # Create the RDF Graph, that will contain the return triples...
            graph   = Graph()

        # this will collect the content, the 'default graph', as called in the RDFa spec
        default_graph = Graph()

        # get the DOM tree
        topElement = dom.documentElement

        # Create the initial state. This takes care of things
        # like base, top level namespace settings, etc.
        state = ExecutionContext(topElement, default_graph, base=self.required_base if self.required_base != None else "", options=self.options, rdfa_version=self.rdfa_version)

        # Perform the built-in and external transformations on the HTML tree.
        for trans in self.options.transformers + builtInTransformers :
            trans(topElement, self.options, state)

        # This may have changed if the state setting detected an explicit version information:
        self.rdfa_version = state.rdfa_version

        # The top level subject starts with the current document; this
        # is used by the recursion
        # this function is the real workhorse
        parse_one_node(topElement, default_graph, None, state, [])

        # Massage the output graph in term of rdfa:Pattern and rdfa:copy
        handle_prototypes(default_graph)