Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
past_item])
def process_changed_claims(claims_differ, current_item, past_item):
changed_claims = []
for p_number in claims_differ.changed():
parent_guids = {claim.snak:claim
for claim in past_item.claims[p_number]}
for claim in current_item.claims[p_number]:
if claim.snak in parent_guids and \
claim not in past_item.claims[p_number]:
changed_claims.append(tuple([parent_guids[claim.snak], claim]))
return changed_claims
changed_claims = Datasource("diff.changed_claims", process_changed_claims,
depends_on=[claims_differ, current_item,
past_item])
def process_added_sources(changed_claims):
added_sources = []
for old_claim, new_claim in changed_claims:
parent_guids = []
for source in old_claim.sources:
for p_number in source:
parent_guids += [claim.hash for claim in source[p_number]]
for source in new_claim.sources:
for p_number in source:
for claim in source[p_number]:
if claim.hash not in parent_guids:
added_sources.append(claim)
from collections import namedtuple
from .datasource import Datasource
from .revision_diff import revision_diff
def process(revision_diff):
operations, a, b = revision_diff
return ["".join(b[op.b1:op.b2])
for op in operations\
if op.name == "insert"]
contiguous_segments_added = Datasource("contiguous_segments_added", process,
depends_on=[revision_diff])
def __init__(self, name):
super().__init__(name)
self.editcount = Datasource(name + ".editcount")
"`int` : A count of edits the user has ever saved"
self.registration_str = Datasource(name + ".registration_str")
self.registration = Datasource(
name + ".registration", or_none(mwtypes.Timestamp),
depends_on=[self.registration_str])
":class:`mwtypes.Timestamp` : The date the user registered or None"
"`str` : The date the user registered in ISO format"
self.groups = Datasource(name + ".groups")
"`list` ( `str` ) : The groups the user is a member of"
self.emailable = Datasource(name + ".emailable")
"`bool` : `True` if the users is emailable, `False` otherwise"
self.gender = Datasource(name + ".gender")
"`str` : A string representing the user's ``gender`` preference."
"""
These meta-datasources operate on :class:`revscoring.Datasource`'s that
return `dict`'s
.. autoclass:: revscoring.datasources.meta.dicts.keys
.. autoclass:: revscoring.datasources.meta.dicts.values
"""
from ..datasource import Datasource
class keys(Datasource):
"""
Generates a set of `dict` keys
:Parameters:
dict_datasource : :class:`revscoring.Datasource`
A datasource that generates a `dict`
name : `str`
A name for the new datasource.
"""
def __init__(self, dict_datasource, name=None):
name = self._format_name(name, [dict_datasource])
super().__init__(name, self.process,
depends_on=[dict_datasource])
def process(self, d):
def __init__(self, name):
super().__init__(name)
self.editcount = Datasource(name + ".editcount")
"`int` : A count of edits the user has ever saved"
self.registration_str = Datasource(name + ".registration_str")
self.registration = Datasource(
name + ".registration", or_none(mwtypes.Timestamp),
depends_on=[self.registration_str])
":class:`mwtypes.Timestamp` : The date the user registered or None"
"`str` : The date the user registered in ISO format"
self.groups = Datasource(name + ".groups")
"`list` ( `str` ) : The groups the user is a member of"
self.emailable = Datasource(name + ".emailable")
"`bool` : `True` if the users is emailable, `False` otherwise"
self.gender = Datasource(name + ".gender")
"`str` : A string representing the user's ``gender`` preference."
from collections import namedtuple
from .datasource import Datasource
from .revision_diff import revision_diff
def process(revision_diff):
operations, a, b = revision_diff
return [t for op in operations
if op.name == "insert"
for t in b[op.b1:op.b2]]
tokens_added = Datasource("tokens_added", process, depends_on=[revision_diff])
user_doc.get('userid'),
user_doc.get('name'),
user_doc.get('editcount'),
registration,
user_doc.get('groups', []),
user_doc.get('implicitgroups', []),
"emailable" in user_doc,
user_doc.get('gender'),
user_doc.get('block_id'),
user_doc.get('blocked_by'),
user_doc.get('blocked_by_id'),
user_doc.get('block_reason'),
user_doc.get('block_expiry')
)
user_info = Datasource("user_info", process, depends_on=[user_doc])
"""
These meta-datasources operate on :class:`revscoring.Datasource`'s that
return `list`'s and `tuple`'s
.. autoclass:: revscoring.datasources.meta.indexable.index
"""
from ..datasource import Datasource
class index(Datasource):
"""
Generates a datasource that returns the value that appears at `i`
:Parameters:
i : `int`
The index of a value to return
default : `mixed`
The value to return if no value exists at `i`. If not specified,
an IndexError will be raised
name : `str`
A name for the new datasource.
"""
def __init__(self, i, datasources, default=NotImplemented, name=None):
name = self._format_name(name, [i, default])
self.i = int(i)
rewrite_name : function
A function to apply to the dependent's name when re-creating it.
cache : dict(:class:`~revscoring.Feature` | :class:`~revscoring.FeatureVector` | :class:`~revscoring.Datasource`)
A map of dependents that have already been converted.
"""
new_name = rewrite_name(dependent.name)
if new_name in cache:
logger.debug("list_of_ify {0} in the cache".format(dependent.name))
return cache[new_name]
else:
logger.debug("list_of_ify is modifying {0} into a list_of".format(dependent.name))
new_dependencies = [list_of_ify(dependency, rewrite_name, cache)
for dependency in dependent.dependencies]
if isinstance(dependent, Datasource):
new_dependent = datasource_expanders.list_of(
dependent, depends_on=new_dependencies, name=new_name)
elif isinstance(dependent, FeatureVector):
new_dependent = datasource_expanders.list_of(
dependent, depends_on=new_dependencies, name=new_name)
elif isinstance(dependent, Feature):
new_dependent = feature_expanders.list_of(
dependent, depends_on=new_dependencies, name=new_name)
else:
raise TypeError("Cannot convert type {0} into a list_of"
.format(type(dependent)))
cache[new_name] = new_dependent
return cache[new_name]
include_page_creation=False,
include_page_suggested=False,
include_content=False):
super().__init__(name)
self.id = Datasource(name + ".id")
"`int` : Revision ID"
self.timestamp_str = Datasource(name + ".timestamp_str")
"`str` : Timestamp the revision was saved in ISO format"
self.timestamp = Datasource(
name + ".timestamp", or_none(mwtypes.Timestamp),
depends_on=[self.timestamp_str])
":class:`mwtypes.Timestamp`: Timestamp the revision was saved"
self.comment = Datasource(name + ".comment")
"`str` : The comment saved with the revision"
self.byte_len = Datasource(name + ".byte_length")
"`int` : The length of the revision content in bytes"
self.minor = Datasource(name + ".minor")
"`bool` : Was the revision flagged as minor?"
self.content_model = Datasource(name + ".content_model")
"`str` : Describes the format of revision content"
if include_content:
self.text = Datasource(name + ".text")
"`str` : The decoded (Unicode) text of the revision content"
if include_parent:
self.parent = Revision(
name + ".parent",
include_parent=False,
include_user_info=False,
include_page=False,