How to use the ural.cli.utils.custom_reader function in ural

To help you get started, we’ve selected a few ural examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github medialab / ural / ural / cli / join.py View on Github external
def join_action(namespace):

    if namespace.large_cells:
        csv.field_size_limit(sys.maxsize)
    file1_headers, file1_position, file1_reader = custom_reader(
        namespace.file1, namespace.column1)
    file2_headers, file2_position, file2_reader = custom_reader(
        namespace.file2, namespace.column2)
    if namespace.select:
        headers = namespace.select + file2_headers
    else:
        headers = file1_headers + file2_headers
    writer = csv.writer(namespace.output)
    writer.writerow(headers)

    trie = LRUTrie()

    for line in file1_reader:
        url = line[file1_position]
        if namespace.select:
            try:
github medialab / ural / ural / cli / join.py View on Github external
def join_action(namespace):

    if namespace.large_cells:
        csv.field_size_limit(sys.maxsize)
    file1_headers, file1_position, file1_reader = custom_reader(
        namespace.file1, namespace.column1)
    file2_headers, file2_position, file2_reader = custom_reader(
        namespace.file2, namespace.column2)
    if namespace.select:
        headers = namespace.select + file2_headers
    else:
        headers = file1_headers + file2_headers
    writer = csv.writer(namespace.output)
    writer.writerow(headers)

    trie = LRUTrie()

    for line in file1_reader:
        url = line[file1_position]
        if namespace.select:
            try:
                metadata = [line[file1_headers.index(
                    x)] for x in namespace.select]
github medialab / ural / ural / cli / normalize.py View on Github external
def normalize_action(namespace):
    sort_query = not namespace.no_query_sort
    strip_authentication = not namespace.keep_authentication
    strip_trailing_slash = namespace.strip_trailing_slash
    strip_index = not namespace.keep_index

    headers, position, reader = custom_reader(namespace.file, namespace.column)

    headers.append(namespace.column + "_normalized")
    writer = csv.writer(namespace.output)
    writer.writerow(headers)

    for line in reader:
        url = line[position]
        line.append(normalize_url(url, sort_query=sort_query, strip_authentication=strip_authentication,
                                  strip_trailing_slash=strip_trailing_slash, strip_index=strip_index))
        writer.writerow(line)