How to use wikiextractor - 10 common examples

To help you get started, we’ve selected a few wikiextractor examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jodaiber / Annotated-WikiExtractor / annotated_wikiextractor / annotated_wikiextractor.py View on Github external
def main():
    script_name = os.path.basename(sys.argv[0])

    try:
        long_opts = ['help', 'usage', 'compress', 'bytes=', 'output=', 'keep-anchors']
        opts, args = getopt.gnu_getopt(sys.argv[1:], 'kcb:o:', long_opts)
    except getopt.GetoptError:
        wikiextractor.show_usage(sys.stderr, script_name)
        wikiextractor.show_suggestion(sys.stderr, script_name)
        sys.exit(1)

    compress = False
    file_size = 500 * 1024
    output_dir = '.'

    for opt, arg in opts:
        if opt == '--help':
            show_help()
            sys.exit()
        elif opt == '--usage':
            wikiextractor.show_usage(sys.stdout, script_name)
            sys.exit()
        elif opt in ('-k', '--keep-anchors'):
            keep_anchors = True
github jodaiber / Annotated-WikiExtractor / annotated_wikiextractor / annotated_wikiextractor.py View on Github external
file_size = int(arg[:-1]) * 1024 * 1024
                else:
                    file_size = int(arg)
                if file_size < 200 * 1024: raise ValueError()
            except ValueError:
                wikiextractor.show_size_error(script_name, arg)
                sys.exit(2)
        elif opt in ('-o', '--output'):
            if os.path.isdir(arg):
                output_dir = arg
            else:
                wikiextractor.show_file_error(script_name, arg)
                sys.exit(3)

    if len(args) > 0:
        wikiextractor.show_usage(sys.stderr, script_name)
        wikiextractor.show_suggestion(sys.stderr, script_name)
        sys.exit(4)

    wiki_extractor = AnnotatedWikiExtractor()
    output_splitter = wikiextractor.OutputSplitter(compress, file_size, output_dir)
    process_data(sys.stdin, wiki_extractor, output_splitter)

    output_splitter.close()
github jodaiber / Annotated-WikiExtractor / annotated_wikiextractor / annotated_wikiextractor.py View on Github external
opts, args = getopt.gnu_getopt(sys.argv[1:], 'kcb:o:', long_opts)
    except getopt.GetoptError:
        wikiextractor.show_usage(sys.stderr, script_name)
        wikiextractor.show_suggestion(sys.stderr, script_name)
        sys.exit(1)

    compress = False
    file_size = 500 * 1024
    output_dir = '.'

    for opt, arg in opts:
        if opt == '--help':
            show_help()
            sys.exit()
        elif opt == '--usage':
            wikiextractor.show_usage(sys.stdout, script_name)
            sys.exit()
        elif opt in ('-k', '--keep-anchors'):
            keep_anchors = True
        elif opt in ('-c', '--compress'):
            compress = True
        elif opt in ('-b', '--bytes'):
            try:
                if arg[-1] in 'kK':
                    file_size = int(arg[:-1]) * 1024
                elif arg[-1] in 'mM':
                    file_size = int(arg[:-1]) * 1024 * 1024
                else:
                    file_size = int(arg)
                if file_size < 200 * 1024: raise ValueError()
            except ValueError:
                wikiextractor.show_size_error(script_name, arg)
github nournia / wikifier / wikiextractor / annotated_wikiextractor.py View on Github external
opts, args = getopt.gnu_getopt(sys.argv[1:], 'kcb:o:', long_opts)
    except getopt.GetoptError:
        wikiextractor.show_usage(sys.stderr, script_name)
        wikiextractor.show_suggestion(sys.stderr, script_name)
        sys.exit(1)

    compress = False
    file_size = 500 * 1024
    output_dir = '.'

    for opt, arg in opts:
        if opt == '--help':
            show_help()
            sys.exit()
        elif opt == '--usage':
            wikiextractor.show_usage(sys.stdout, script_name)
            sys.exit()
        elif opt in ('-k', '--keep-anchors'):
            keep_anchors = True
        elif opt in ('-c', '--compress'):
            compress = True
        elif opt in ('-b', '--bytes'):
            try:
                if arg[-1] in 'kK':
                    file_size = int(arg[:-1]) * 1024
                elif arg[-1] in 'mM':
                    file_size = int(arg[:-1]) * 1024 * 1024
                else:
                    file_size = int(arg)
                if file_size < 200 * 1024: raise ValueError()
            except ValueError:
                wikiextractor.show_size_error(script_name, arg)
github nournia / wikifier / wikiextractor / annotated_wikiextractor.py View on Github external
file_size = int(arg[:-1]) * 1024 * 1024
                else:
                    file_size = int(arg)
                if file_size < 200 * 1024: raise ValueError()
            except ValueError:
                wikiextractor.show_size_error(script_name, arg)
                sys.exit(2)
        elif opt in ('-o', '--output'):
            if os.path.isdir(arg):
                output_dir = arg
            else:
                wikiextractor.show_file_error(script_name, arg)
                sys.exit(3)

    if len(args) > 0:
        wikiextractor.show_usage(sys.stderr, script_name)
        wikiextractor.show_suggestion(sys.stderr, script_name)
        sys.exit(4)

    wiki_extractor = AnnotatedWikiExtractor()
    output_splitter = wikiextractor.OutputSplitter(compress, file_size, output_dir)
    process_data(sys.stdin, wiki_extractor, output_splitter)

    output_splitter.close()
github nournia / wikifier / wikiextractor / annotated_wikiextractor.py View on Github external
def main():
    script_name = os.path.basename(sys.argv[0])

    try:
        long_opts = ['help', 'usage', 'compress', 'bytes=', 'output=', 'keep-anchors']
        opts, args = getopt.gnu_getopt(sys.argv[1:], 'kcb:o:', long_opts)
    except getopt.GetoptError:
        wikiextractor.show_usage(sys.stderr, script_name)
        wikiextractor.show_suggestion(sys.stderr, script_name)
        sys.exit(1)

    compress = False
    file_size = 500 * 1024
    output_dir = '.'

    for opt, arg in opts:
        if opt == '--help':
            show_help()
            sys.exit()
        elif opt == '--usage':
            wikiextractor.show_usage(sys.stdout, script_name)
            sys.exit()
        elif opt in ('-k', '--keep-anchors'):
            keep_anchors = True
github nournia / wikifier / wikiextractor / annotated_wikiextractor.py View on Github external
wikiextractor.show_size_error(script_name, arg)
                sys.exit(2)
        elif opt in ('-o', '--output'):
            if os.path.isdir(arg):
                output_dir = arg
            else:
                wikiextractor.show_file_error(script_name, arg)
                sys.exit(3)

    if len(args) > 0:
        wikiextractor.show_usage(sys.stderr, script_name)
        wikiextractor.show_suggestion(sys.stderr, script_name)
        sys.exit(4)

    wiki_extractor = AnnotatedWikiExtractor()
    output_splitter = wikiextractor.OutputSplitter(compress, file_size, output_dir)
    process_data(sys.stdin, wiki_extractor, output_splitter)

    output_splitter.close()
github jodaiber / Annotated-WikiExtractor / annotated_wikiextractor / annotated_wikiextractor.py View on Github external
wikiextractor.show_size_error(script_name, arg)
                sys.exit(2)
        elif opt in ('-o', '--output'):
            if os.path.isdir(arg):
                output_dir = arg
            else:
                wikiextractor.show_file_error(script_name, arg)
                sys.exit(3)

    if len(args) > 0:
        wikiextractor.show_usage(sys.stderr, script_name)
        wikiextractor.show_suggestion(sys.stderr, script_name)
        sys.exit(4)

    wiki_extractor = AnnotatedWikiExtractor()
    output_splitter = wikiextractor.OutputSplitter(compress, file_size, output_dir)
    process_data(sys.stdin, wiki_extractor, output_splitter)

    output_splitter.close()
github nournia / wikifier / wikiextractor / annotated_wikiextractor.py View on Github external
try:
                if arg[-1] in 'kK':
                    file_size = int(arg[:-1]) * 1024
                elif arg[-1] in 'mM':
                    file_size = int(arg[:-1]) * 1024 * 1024
                else:
                    file_size = int(arg)
                if file_size < 200 * 1024: raise ValueError()
            except ValueError:
                wikiextractor.show_size_error(script_name, arg)
                sys.exit(2)
        elif opt in ('-o', '--output'):
            if os.path.isdir(arg):
                output_dir = arg
            else:
                wikiextractor.show_file_error(script_name, arg)
                sys.exit(3)

    if len(args) > 0:
        wikiextractor.show_usage(sys.stderr, script_name)
        wikiextractor.show_suggestion(sys.stderr, script_name)
        sys.exit(4)

    wiki_extractor = AnnotatedWikiExtractor()
    output_splitter = wikiextractor.OutputSplitter(compress, file_size, output_dir)
    process_data(sys.stdin, wiki_extractor, output_splitter)

    output_splitter.close()
github jodaiber / Annotated-WikiExtractor / annotated_wikiextractor / annotated_wikiextractor.py View on Github external
try:
                if arg[-1] in 'kK':
                    file_size = int(arg[:-1]) * 1024
                elif arg[-1] in 'mM':
                    file_size = int(arg[:-1]) * 1024 * 1024
                else:
                    file_size = int(arg)
                if file_size < 200 * 1024: raise ValueError()
            except ValueError:
                wikiextractor.show_size_error(script_name, arg)
                sys.exit(2)
        elif opt in ('-o', '--output'):
            if os.path.isdir(arg):
                output_dir = arg
            else:
                wikiextractor.show_file_error(script_name, arg)
                sys.exit(3)

    if len(args) > 0:
        wikiextractor.show_usage(sys.stderr, script_name)
        wikiextractor.show_suggestion(sys.stderr, script_name)
        sys.exit(4)

    wiki_extractor = AnnotatedWikiExtractor()
    output_splitter = wikiextractor.OutputSplitter(compress, file_size, output_dir)
    process_data(sys.stdin, wiki_extractor, output_splitter)

    output_splitter.close()