How to use the ruia.field.BaseField function in ruia

To help you get started, we’ve selected a few ruia examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github howie6879 / ruia / ruia / field.py View on Github external
def _parse_element(self, element):
        return etree.tostring(element, encoding="utf-8").decode(encoding="utf-8")


class TextField(_LxmlElementField):
    """
    This field is used to get text.
    """

    def _parse_element(self, element):
        strings = [node.strip() for node in element.itertext()]
        string = "".join(strings)
        return string if string else self.default


class RegexField(BaseField):
    """
    This field is used to get raw html code by regular expression.
    RegexField uses standard library `re` inner, that is to say it has a better performance than _LxmlElementField.
    """

    def __init__(self, re_select: str, re_flags=0, default="", many: bool = False):
        super(RegexField, self).__init__(default=default, many=many)
        self._re_select = re_select
        self._re_object = re.compile(self._re_select, flags=re_flags)

    def _parse_match(self, match):
        """
        If there is a group dict, return the dict;
            even if there's only one value in the dict, return a dictionary;
        If there is a group in match, return the group;
            if there is only one value in the group, return the value;
github howie6879 / ruia / ruia / item.py View on Github external
def __new__(cls, name, bases, attrs):
        __fields = dict(
            {
                (field_name, attrs.pop(field_name))
                for field_name, object in list(attrs.items())
                if isinstance(object, BaseField)
            }
        )
        attrs["__fields"] = __fields
        new_class = type.__new__(cls, name, bases, attrs)
        return new_class
github howie6879 / ruia / ruia / field.py View on Github external
def __init__(self, default="", many: bool = False):
        """
        Init BaseField class
        url: http://lxml.de/index.html
        :param default: default value
        :param many: if there are many fields in one page
        """
        self.default = default
        self.many = many

    def extract(self, *args, **kwargs):
        raise NotImplementedError("extract is not implemented.")


class _LxmlElementField(BaseField):
    def __init__(
        self,
        css_select: str = None,
        xpath_select: str = None,
        default=None,
        many: bool = False,
    ):
        """
        :param css_select: css select http://lxml.de/cssselect.html
        :param xpath_select: http://www.w3school.com.cn/xpath/index.asp
        :param default: inherit
        :param many: inherit
        """
        super(_LxmlElementField, self).__init__(default=default, many=many)
        self.css_select = css_select
        self.xpath_select = xpath_select