How to use the archivebox.config.OUTPUT_DIR function in archivebox

To help you get started, we’ve selected a few archivebox examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pirate / ArchiveBox / archivebox / index / html.py View on Github external
def parse_html_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[str]:
    """parse an archive index html file and return the list of urls"""

    index_path = os.path.join(out_dir, HTML_INDEX_FILENAME)
    if os.path.exists(index_path):
        with open(index_path, 'r', encoding='utf-8') as f:
            for line in f:
                if 'class="link-url"' in line:
                    yield line.split('"')[1]
    return ()
github pirate / ArchiveBox / archivebox / index / json.py View on Github external
def write_json_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
    """write the json link index to a given path"""

    assert isinstance(links, List), 'Links must be a list, not a generator.'
    assert not links or isinstance(links[0].history, dict)
    assert not links or isinstance(links[0].sources, list)

    if links and links[0].history.get('title'):
        assert isinstance(links[0].history['title'][0], ArchiveResult)

    if links and links[0].sources:
        assert isinstance(links[0].sources[0], str)

    main_index_json = {
        **MAIN_INDEX_HEADER,
        'num_links': len(links),
        'updated': datetime.now(),
github pirate / ArchiveBox / archivebox / index / html.py View on Github external
def write_html_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None:
    """write the html link index to a given path"""

    copy_and_overwrite(join(TEMPLATES_DIR, FAVICON_FILENAME), join(out_dir, FAVICON_FILENAME))
    copy_and_overwrite(join(TEMPLATES_DIR, ROBOTS_TXT_FILENAME), join(out_dir, ROBOTS_TXT_FILENAME))
    copy_and_overwrite(join(TEMPLATES_DIR, STATIC_DIR_NAME), join(out_dir, STATIC_DIR_NAME))
    
    rendered_html = main_index_template(links, finished=finished)
    atomic_write(rendered_html, join(out_dir, HTML_INDEX_FILENAME))
github pirate / ArchiveBox / archivebox / main.py View on Github external
def list_links(filter_patterns: Optional[List[str]]=None,
               filter_type: str='exact',
               after: Optional[float]=None,
               before: Optional[float]=None,
               out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
    
    check_data_folder(out_dir=out_dir)

    all_links = load_main_index(out_dir=out_dir)

    for link in all_links:
        if after is not None and float(link.timestamp) < after:
            continue
        if before is not None and float(link.timestamp) > before:
            continue
        
        if filter_patterns:
            if link_matches_filter(link, filter_patterns, filter_type):
                yield link
        else:
            yield link
github pirate / ArchiveBox / archivebox / index / json.py View on Github external
def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
    """parse an archive index json file and return the list of links"""

    index_path = os.path.join(out_dir, JSON_INDEX_FILENAME)
    if os.path.exists(index_path):
        with open(index_path, 'r', encoding='utf-8') as f:
            links = pyjson.load(f)['links']
            for link_json in links:
                yield Link.from_json(link_json)

    return ()
github pirate / ArchiveBox / archivebox / core / views.py View on Github external
def get(self, request):
        if not request.user.is_authenticated and not PUBLIC_INDEX:
            return redirect(f'/admin/login/?next={request.path}')

        all_links = load_main_index(out_dir=OUTPUT_DIR)
        meta_info = load_main_index_meta(out_dir=OUTPUT_DIR)

        context = {
            'updated': meta_info['updated'],
            'num_links': meta_info['num_links'],
            'links': all_links,
            'VERSION': VERSION,
            'FOOTER_INFO': FOOTER_INFO,
        }

        return render(template_name=self.template, request=request, context=context)
github pirate / ArchiveBox / archivebox / main.py View on Github external
def help(out_dir: str=OUTPUT_DIR) -> None:
    """Print the ArchiveBox help message and usage"""

    all_subcommands = list_subcommands()
    COMMANDS_HELP_TEXT = '\n    '.join(
        f'{cmd.ljust(20)} {summary}'
        for cmd, summary in all_subcommands.items()
        if cmd in meta_cmds
    ) + '\n\n    ' + '\n    '.join(
        f'{cmd.ljust(20)} {summary}'
        for cmd, summary in all_subcommands.items()
        if cmd in main_cmds
    ) + '\n\n    ' + '\n    '.join(
        f'{cmd.ljust(20)} {summary}'
        for cmd, summary in all_subcommands.items()
        if cmd in archive_cmds
    ) + '\n\n    ' + '\n    '.join(
github pirate / ArchiveBox / archivebox / index / sql.py View on Github external
def get_admins(out_dir: str=OUTPUT_DIR) -> List[str]:
    setup_django(out_dir, check_db=False)
    from django.contrib.auth.models import User
    return User.objects.filter(is_superuser=True)