Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def remove(filter_str: Optional[str]=None,
filter_patterns: Optional[List[str]]=None,
filter_type: str='exact',
after: Optional[float]=None,
before: Optional[float]=None,
yes: bool=False,
delete: bool=False,
out_dir: str=OUTPUT_DIR) -> List[Link]:
"""Remove the specified URLs from the archive"""
check_data_folder(out_dir=out_dir)
if filter_str and filter_patterns:
stderr(
'[X] You should pass either a pattern as an argument, '
'or pass a list of patterns via stdin, but not both.\n',
color='red',
)
raise SystemExit(2)
elif not (filter_str or filter_patterns):
stderr(
'[X] You should pass either a pattern as an argument, '
'or pass a list of patterns via stdin.',
color='red',
)
stderr()
stderr(' {lightred}Hint:{reset} To remove all urls you can run:'.format(**ANSI))
def list_all(filter_patterns_str: Optional[str]=None,
filter_patterns: Optional[List[str]]=None,
filter_type: str='exact',
status: Optional[str]=None,
after: Optional[float]=None,
before: Optional[float]=None,
sort: Optional[str]=None,
csv: Optional[str]=None,
json: bool=False,
out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
"""List, filter, and export information about archive entries"""
check_data_folder(out_dir=out_dir)
if filter_patterns and filter_patterns_str:
stderr(
'[X] You should either pass filter patterns as an arguments '
'or via stdin, but not both.\n',
color='red',
)
raise SystemExit(2)
elif filter_patterns_str:
filter_patterns = filter_patterns_str.split('\n')
links = list_links(
filter_patterns=filter_patterns,
filter_type=filter_type,
before=before,
def add(import_str: Optional[str]=None,
import_path: Optional[str]=None,
update_all: bool=not ONLY_NEW,
index_only: bool=False,
out_dir: str=OUTPUT_DIR) -> List[Link]:
"""Add a new URL or list of URLs to your archive"""
check_data_folder(out_dir=out_dir)
if import_str and import_path:
stderr(
'[X] You should pass either an import path as an argument, '
'or pass a list of links via stdin, but not both.\n',
color='red',
)
raise SystemExit(2)
elif import_str:
import_path = save_stdin_to_sources(import_str, out_dir=out_dir)
else:
import_path = save_file_to_sources(import_path, out_dir=out_dir)
check_dependencies()
# Step 1: Load list of links from the existing index
def manage(args: Optional[List[str]]=None, out_dir: str=OUTPUT_DIR) -> None:
"""Run an ArchiveBox Django management command"""
check_data_folder(out_dir=out_dir)
setup_django(out_dir)
from django.core.management import execute_from_command_line
execute_from_command_line([f'{ARCHIVEBOX_BINARY} manage', *(args or ['help'])])
def list_folders(links: List[Link],
status: str,
out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
check_data_folder(out_dir=out_dir)
if status == 'indexed':
return get_indexed_folders(links, out_dir=out_dir)
elif status == 'archived':
return get_archived_folders(links, out_dir=out_dir)
elif status == 'unarchived':
return get_unarchived_folders(links, out_dir=out_dir)
elif status == 'present':
return get_present_folders(links, out_dir=out_dir)
elif status == 'valid':
return get_valid_folders(links, out_dir=out_dir)
elif status == 'invalid':
return get_invalid_folders(links, out_dir=out_dir)
elif status == 'duplicate':
def info(out_dir: str=OUTPUT_DIR) -> None:
"""Print out some info and statistics about the archive collection"""
check_data_folder(out_dir=out_dir)
print('{green}[*] Scanning archive collection main index...{reset}'.format(**ANSI))
print(f' {out_dir}/*')
num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
size = printable_filesize(num_bytes)
print(f' Size: {size} across {num_files} files')
print()
links = list(load_main_index(out_dir=out_dir))
num_json_links = len(links)
num_sql_links = sum(1 for link in parse_sql_main_index(out_dir=out_dir))
num_html_links = sum(1 for url in parse_html_main_index(out_dir=out_dir))
num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
users = get_admins().values_list('username', flat=True)
print(f' > JSON Main Index: {num_json_links} links'.ljust(36), f'(found in {JSON_INDEX_FILENAME})')
print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
def list_links(filter_patterns: Optional[List[str]]=None,
filter_type: str='exact',
after: Optional[float]=None,
before: Optional[float]=None,
out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
check_data_folder(out_dir=out_dir)
all_links = load_main_index(out_dir=out_dir)
for link in all_links:
if after is not None and float(link.timestamp) < after:
continue
if before is not None and float(link.timestamp) > before:
continue
if filter_patterns:
if link_matches_filter(link, filter_patterns, filter_type):
yield link
else:
yield link
def shell(out_dir: str=OUTPUT_DIR) -> None:
"""Enter an interactive ArchiveBox Django shell"""
check_data_folder(out_dir=out_dir)
setup_django(OUTPUT_DIR)
from django.core.management import call_command
call_command("shell_plus")