How to use the pywikibot.textlib.replaceExcept function in pywikibot

To help you get started, we’ve selected a few pywikibot examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / pywikibot / tests / textlib_tests.py View on Github external
def test_regex_replace(self):
        """Test replacing with a regex."""
        self.assertEqual(textlib.replaceExcept('A123B', r'\d', r'x', [],
                                               site=self.site),
                         'AxxxB')
        self.assertEqual(textlib.replaceExcept('A123B', r'\d+', r'x', [],
                                               site=self.site),
                         'AxB')
        self.assertEqual(textlib.replaceExcept('A123B',
                                               r'A(\d)2(\d)B', r'A\1x\2B', [],
                                               site=self.site),
                         'A1x3B')
        self.assertEqual(
            textlib.replaceExcept('', r'(a?)', r'\1B', [], site=self.site),
            'B')
        self.assertEqual(
            textlib.replaceExcept('abc', r'x*', r'-', [], site=self.site),
            '-a-b-c-')
        # This is different from re.sub() as re.sub() doesn't
github wikimedia / pywikibot / tests / textlib_tests.py View on Github external
def test_replace_with_marker(self):
        """Test replacing with a marker."""
        self.assertEqual(textlib.replaceExcept('AxyxB', 'x', 'y', [],
                                               marker='.',
                                               site=self.site),
                         'Ayyy.B')
        self.assertEqual(textlib.replaceExcept('AxyxB', '1', 'y', [],
                                               marker='.',
                                               site=self.site),
                         'AxyxB.')
github wikimedia / pywikibot / tests / textlib_tests.py View on Github external
'[[File:x|]]')

        self.assertEqual(
            textlib.replaceExcept(
                '[[File:x|foo|bar x]] x',
                'x', 'y', ['file'], site=self.site),
            '[[File:x|foo|bar x]] y')

        self.assertEqual(
            textlib.replaceExcept(
                '[[File:x|]][[File:x|foo]]',
                'x', 'y', ['file'], site=self.site),
            '[[File:x|]][[File:x|foo]]')

        self.assertEqual(
            textlib.replaceExcept(
                '[[NonFile:x]]',
                'x', 'y', ['file'], site=self.site),
            '[[NonFile:y]]')

        self.assertEqual(
            textlib.replaceExcept(
                '[[File:]]',
                'File:', 'NonFile:', ['file'], site=self.site),
            '[[File:]]')

        self.assertEqual(
            textlib.replaceExcept(
                '[[File:x|[[foo]].]]',
                'x', 'y', ['file'], site=self.site),
            '[[File:x|[[foo]].]]')
github wikimedia / pywikibot / tests / textlib_tests.py View on Github external
'x', 'y', ['source'],
                                               site=self.site),
                         '<source>x')
        self.assertEqual(textlib.replaceExcept(
            'x',
            'x', 'y', ['source'], site=self.site),
            'x')
        self.assertEqual(
            textlib.replaceExcept('x',
                                  'x', 'y', ['source'], site=self.site),
            'x')
        self.assertEqual(textlib.replaceExcept('x',
                                               'x', 'y', ['includeonly'],
                                               site=self.site),
                         'x')
        self.assertEqual(textlib.replaceExcept('x', 'x', 'y',
                                               ['ref'], site=self.site),
                         'x')
        self.assertEqual(textlib.replaceExcept('A',
                                               'x', 'y',
                                               ['ref'], site=self.site),
                         'A')
        self.assertEqual(textlib.replaceExcept(' xA ', 'x', 'y',
                                               ['startspace'], site=self.site),
                         ' xA ')
        self.assertEqual(textlib.replaceExcept(':xA ', 'x', 'y',
                                               ['startcolon'], site=self.site),
                         ':xA ')
        self.assertEqual(textlib.replaceExcept('x<table></table>', 'x', 'y',
                                               ['table'], site=self.site),
                         'x<table></table>')
        self.assertEqual(textlib.replaceExcept('x [http://www.sample.com x]',
github wikimedia / pywikibot / pywikibot / cosmetic_changes.py View on Github external
# FIXME: use textlib.NON_LATIN_DIGITS
        # valid digits
        digits = {
            'ckb': u'٠١٢٣٤٥٦٧٨٩',
            'fa': u'۰۱۲۳۴۵۶۷۸۹',
        }
        faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa']
        new = digits.pop(self.site.code)
        # This only works if there are only two items in digits dict
        old = digits[list(digits.keys())[0]]
        # not to let bot edits in latin content
        exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]"
                                     % {'fa': faChrs}))
        text = textlib.replaceExcept(text, ',', '،', exceptions, site=self.site)
        if self.site.code == 'ckb':
            text = textlib.replaceExcept(text,
                                         '\u0647([.\u060c_&lt;\\]\\s])',
                                         '\u06d5\\1', exceptions,
                                         site=self.site)
            text = textlib.replaceExcept(text, 'ه\u200c', 'ە', exceptions,
                                         site=self.site)
            text = textlib.replaceExcept(text, 'ه', 'ھ', exceptions,
                                         site=self.site)
        text = textlib.replaceExcept(text, 'ك', 'ک', exceptions,
                                     site=self.site)
        text = textlib.replaceExcept(text, '[ىي]', 'ی', exceptions,
                                     site=self.site)

        return text

        # FIXME: split this function into two.
        # replace persian/arabic digits
github wikimedia / pywikibot / scripts / imagetransfer.py View on Github external
"""
        sourceSite = sourceImagePage.site
        url = sourceImagePage.fileUrl().encode('utf-8')
        pywikibot.output('URL should be: ' + url)
        # localize the text that should be printed on image description page
        try:
            description = sourceImagePage.get()
            # try to translate license templates
            if (sourceSite.sitename,
                    self.targetSite.sitename) in licenseTemplates:
                for old, new in licenseTemplates[
                        (sourceSite.sitename,
                         self.targetSite.sitename)].items():
                    new = '{{%s}}' % new
                    old = re.compile('{{%s}}' % old)
                    description = textlib.replaceExcept(description, old, new,
                                                        ['comment', 'math',
                                                         'nowiki', 'pre'])

            description = i18n.twtranslate(self.targetSite,
                                           'imagetransfer-file_page_message',
                                           {'site': sourceSite,
                                            'description': description})
            description += '\n\n'
            description += sourceImagePage.getFileVersionHistoryTable()
            # add interwiki link
            if sourceSite.family == self.targetSite.family:
                description += '\n\n{0}'.format(sourceImagePage)
        except pywikibot.NoPage:
            description = ''
            pywikibot.output(
                'Image does not exist or description page is empty.')
github wikimedia / pywikibot / pywikibot / cosmetic_changes.py View on Github external
# valid digits
        digits = {
            'ckb': '٠١٢٣٤٥٦٧٨٩',
            'fa': '۰۱۲۳۴۵۶۷۸۹',
        }
        faChrs = 'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa']
        new = digits.pop(self.site.code)
        # This only works if there are only two items in digits dict
        old = digits[list(digits.keys())[0]]
        # not to let bot edits in latin content
        exceptions.append(re.compile('[^%(fa)s] *?\"*? *?, *?[^%(fa)s]'
                                     % {'fa': faChrs}))
        text = textlib.replaceExcept(text, ',', '،', exceptions,
                                     site=self.site)
        if self.site.code == 'ckb':
            text = textlib.replaceExcept(text,
                                         '\u0647([.\u060c_&lt;\\]\\s])',
                                         '\u06d5\\1', exceptions,
                                         site=self.site)
            text = textlib.replaceExcept(text, 'ه\u200c', 'ە', exceptions,
                                         site=self.site)
            text = textlib.replaceExcept(text, 'ه', 'ھ', exceptions,
                                         site=self.site)
        text = textlib.replaceExcept(text, 'ك', 'ک', exceptions,
                                     site=self.site)
        text = textlib.replaceExcept(text, '[ىي]', 'ی', exceptions,
                                     site=self.site)

        return text

        # FIXME: split this function into two.
        # replace persian/arabic digits
github wikimedia / pywikibot / pywikibot / cosmetic_changes.py View on Github external
def fixTypo(self, text):
        """Fix units."""
        exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
                      'startspace', 'gallery', 'hyperlink', 'interwiki',
                      'link']
        # change  ccm -&gt;  cm³
        text = textlib.replaceExcept(text, r'(\d)\s*(?:&nbsp;)?ccm',
                                     r'\1&nbsp;cm³', exceptions,
                                     site=self.site)
        # Solve wrong Nº sign with °C or °F
        # additional exception requested on fr-wiki for this stuff
        pattern = re.compile('«.*?»', re.UNICODE)
        exceptions.append(pattern)
        text = textlib.replaceExcept(text, r'(\d)\s*(?:&nbsp;)?[º°]([CF])',
                                     r'\1&nbsp;°\2', exceptions,
                                     site=self.site)
        text = textlib.replaceExcept(text, 'º([CF])', '°' + r'\1',
                                     exceptions,
                                     site=self.site)
        return text
github wikimedia / pywikibot / pywikibot / cosmetic_changes.py View on Github external
text,
            r'\[\[(?Phttps?://[^\]]+?)\]\]?',
            r'[\g]', exceptions, site=self.site)
        # external link and description separated by a pipe, with
        # whitespace in front of the pipe, so that it is clear that
        # the dash is not a legitimate part of the URL.
        text = textlib.replaceExcept(
            text,
            r'\[(?Phttps?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]',
            r'[\g \g<label>]', exceptions)
        # dash in external link, where the correct end of the URL can
        # be detected from the file extension. It is very unlikely that
        # this will cause mistakes.
        extensions = [r'\.{0}'.format(ext)
                      for ext in ['pdf', 'html?', 'php', 'aspx?', 'jsp']]
        text = textlib.replaceExcept(
            text,
            r'\[(?Phttps?://[^\|\] ]+?(' + '|'.join(extensions) + r')) *'
            r'\| *(?P<label>[^\|\]]+?)\]',
            r'[\g \g<label>]', exceptions)
        return text
</label></label></label></label>
github wikimedia / pywikibot / pywikibot / cosmetic_changes.py View on Github external
# FIXME: split this function into two.
        # replace persian/arabic digits
        # deactivated due to bug T57185
        for i in range(0, 10):
            text = textlib.replaceExcept(text, old[i], new[i], exceptions)
        # do not change digits in class, style and table params
        pattern = re.compile(r'\w+=(".+?"|\d+)', re.UNICODE)
        exceptions.append(pattern)
        # do not change digits inside html-tags
        pattern = re.compile('&lt;[/]*?[^', re.UNICODE)
        exceptions.append(pattern)
        exceptions.append('table')  # exclude tables for now
        # replace digits
        for i in range(0, 10):
            text = textlib.replaceExcept(text, str(i), new[i], exceptions)
        return text