How to use the hdfs.Config function in hdfs

To help you get started, we’ve selected a few hdfs examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github grabbles / grabbit / grabbit / extensions / hdfs.py View on Github external
be dynamically added to the Layout every time a new Entity is
                created. This is implemented by creating a partial function of
                the get() function that sets the target argument to the
                entity name.
            absolute_paths (bool): If True, grabbit uses absolute file paths
                everywhere (including when returning query results). If False,
                the input path will determine the behavior (i.e., relative if
                a relative path was passed, absolute if an absolute path was
                passed).
            regex_search (bool): Whether to require exact matching (True)
                or regex search (False, default) when comparing the query
                string to each entity in .get() calls. This sets a default for
                the instance, but can be overridden in individual .get()
                requests.
        """
        self._hdfs_client = Config().get_client()

        path = abspath(path) if absolute_paths and self._hdfs_client is None \
            else path

        # Preprocess the config file
        if isinstance(config, six.string_types):
            config = '/'.join(config.strip('hdfs://').split('/')[1:])
            config = config.replace(self._hdfs_client.root[1:], '')
            with self._hdfs_client.read(config) as reader:
                config = json.load(reader)

        super(HDFSLayout, self).__init__(path, config, dynamic_getters,
                                         absolute_paths, regex_search)

github mtth / hdfs / examples / dataframe-example.py View on Github external
#!/usr/bin/env python
# encoding: utf-8

"""Dataframe extension example."""

from hdfs import Config
from hdfs.ext.dataframe import read_dataframe, write_dataframe
import pandas as pd


# Get the default alias' client.
client = Config().get_client()

# A sample dataframe.
df = pd.DataFrame.from_records([
  {'A': 1, 'B': 2},
  {'A': 11, 'B': 23}
])

# Write dataframe to HDFS using Avro serialization.
write_dataframe(client, 'data.avro', df, overwrite=True)

# Read the Avro file back from HDFS.
_df = read_dataframe(client, 'data.avro')

# The frames match!
pd.util.testing.assert_frame_equal(df, _df)
github mtth / hdfs / examples / avro-example.py View on Github external
#!/usr/bin/env python
# encoding: utf-8

"""Avro extension example."""

from hdfs import Config
from hdfs.ext.avro import AvroReader, AvroWriter


# Get the default alias' client.
client = Config().get_client()

# Some sample data.
records = [
  {'name': 'Ann', 'age': 23},
  {'name': 'Bob', 'age': 22},
]

# Write an Avro File to HDFS (since our records' schema is very simple, we let
# the writer infer it automatically, otherwise we would pass it as argument).
with AvroWriter(client, 'names.avro', overwrite=True) as writer:
  for record in records:
    writer.write(record)

# Read it back.
with AvroReader(client, 'names.avro') as reader:
  schema = reader.schema # The inferred schema.
github mtth / hdfs / examples / json-example.py View on Github external
# encoding: utf-8

"""Sample HdfsCLI script.

This example shows how to write files to HDFS, read them back, and perform a
few other simple filesystem operations.

"""

from hdfs import Config
from json import dump, load


# Get the default alias' client. (See the quickstart section in the
# documentation to learn more about this.)
client = Config().get_client()

# Some fake data that we are interested in uploading to HDFS.
model = {
  '(intercept)': 48.,
  'first_feature': 2.,
  'second_feature': 12.,
}

# First, we delete any existing `models/` folder on HDFS.
client.delete('models', recursive=True)

# We can now upload the data, first as CSV.
with client.write('models/1.csv', encoding='utf-8') as writer:
  for item in model.items():
    writer.write(u'%s,%s\n' % item)