How to use the pyhive.hive function in PyHive

To help you get started, we’ve selected a few PyHive examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github verdict-project / verdict / docker / define_tpch_samples.py View on Github external
from pyhive import hive


verdict.set_loglevel("debug")
meta_setup = False
fast_meta = True
hostname = 'presto'
if len(sys.argv) > 1:
    hostname = sys.argv[1]
if len(sys.argv) > 2:
    if sys.argv[2].lower() == "true":
        meta_setup = True
if len(sys.argv) > 3:
    if sys.argv[3].lower() == "false":
        fast_meta = False
hive_cursor = hive.connect(hostname).cursor()

presto_conn = prestodb.dbapi.connect(
                host=hostname,
                port=8080,
                user='verdict',
                catalog='hive',
                schema='default',
                )
presto_cursor = presto_conn.cursor()
# presto_cursor = presto.connect(hostname).cursor()
v = verdict.presto(presto_host=hostname, preload_cache=True)

part_col = 'verdictcol'
sample_schema = 'verdict'

# TODO: samples for customers table
github apache / incubator-superset / superset / db_engines / hive.py View on Github external
.. note::
        This is not a part of DB-API.
    """
    from pyhive import hive
    from TCLIService import ttypes
    from thrift import Thrift  # pylint: disable=import-error

    orientation = orientation or ttypes.TFetchOrientation.FETCH_NEXT
    try:
        req = ttypes.TGetLogReq(operationHandle=self._operationHandle)
        logs = self._connection.client.GetLog(req).log
        return logs
    # raised if Hive is used
    except (ttypes.TApplicationException, Thrift.TApplicationException):
        if self._state == self._STATE_NONE:
            raise hive.ProgrammingError("No query yet")
        logs = []
        while True:
            req = ttypes.TFetchResultsReq(
                operationHandle=self._operationHandle,
                orientation=ttypes.TFetchOrientation.FETCH_NEXT,
                maxRows=self.arraysize,
                fetchType=1,  # 0: results, 1: logs
            )
            response = self._connection.client.FetchResults(req)
            hive._check_status(response)
            assert not response.results.rows, "expected data in columnar format"
            assert len(response.results.columns) == 1, response.results.columns
            new_logs = hive._unwrap_column(response.results.columns[0])
            logs += new_logs
            if not new_logs:
                break
github linkedin / WhereHows / metadata-ingestion / hive-etl / hive_etl.py View on Github external
def hive_query(query):
    """
    Execute the query to the HiveStore.
    """
    cursor = hive.connect(HIVESTORE).cursor()
    cursor.execute(query, async=True)
    status = cursor.poll().operationState
    while status in (TOperationState.INITIALIZED_STATE, TOperationState.RUNNING_STATE):
        logs = cursor.fetch_logs()
        for message in logs:
            sys.stdout.write(message)
        status = cursor.poll().operationState
    results = cursor.fetchall()
    return results
github crflynn / databricks-dbapi / databricks_dbapi / databricks.py View on Github external
"""Provide a function to create a Hive connection to a Databricks cluster."""
import base64
import sys

from pyhive import hive
# Make all exceptions visible in this module per DB-API
from pyhive.exc import *
from thrift.transport import THttpClient


PY_MAJOR = sys.version_info[0]

# PEP 249 module globals
apilevel = hive.apilevel
threadsafety = hive.threadsafety
paramstyle = hive.paramstyle


def connect(host, port=443, database="default", cluster=None, http_path=None, token=None, user=None, password=None):
    """Create a Hive DBAPI connection to an interactive Databricks cluster.

    Create a DBAPI connection to a Databricks cluster, which can be used to generate
    DBAPI cursor(s). Provide either a cluster name OR an http_path from the cluster's
    JDBC/ODBC connection details. If using Azure, http_path is required. On
    instantiation, http_path is prioritized over cluster.

    For authentication, provide either a token OR both a user and password. Token
    authentication is strongly preferred.

    :param str host: the server hostname from the cluster's JDBC/ODBC connection page.
github getredash / redash / redash / query_runner / hive_ds.py View on Github external
def _get_connection(self):
        host = self.configuration["host"]

        connection = hive.connect(
            host=host,
            port=self.configuration.get("port", None),
            database=self.configuration.get("database", "default"),
            username=self.configuration.get("username", None),
        )

        return connection