How to use PyHive - 10 common examples

To help you get started, we’ve selected a few PyHive examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github apache / incubator-superset / tests / sql_validator_tests.py View on Github external
def test_validator_db_error(self, flask_g):
        flask_g.user.username = "nobody"
        sql = "SELECT 1 FROM default.notarealtable"
        schema = "default"

        fetch_fn = self.database.db_engine_spec.fetch_data
        fetch_fn.side_effect = DatabaseError("dummy db error")

        with self.assertRaises(PrestoSQLValidationError):
            self.validator.validate(sql, schema, self.database)
github apache / incubator-superset / superset / db_engine_specs.py View on Github external
def handle_cursor(cls, cursor, query, session):
        """Updates progress information"""
        from pyhive import hive
        unfinished_states = (
            hive.ttypes.TOperationState.INITIALIZED_STATE,
            hive.ttypes.TOperationState.RUNNING_STATE,
        )
        polled = cursor.poll()
        while polled.operationState in unfinished_states:
            query = session.query(type(query)).filter_by(id=query.id)
            if query.status == QueryStatus.STOPPED:
                cursor.cancel()
                break

            resp = cursor.fetch_logs()
            if resp and resp.log:
                progress = cls.progress(resp.log)
                if progress > query.progress:
                    query.progress = progress
                session.commit()
            time.sleep(5)
github apache / incubator-superset / superset / db_engine_specs.py View on Github external
def handle_cursor(cls, cursor, query, session):
        """Updates progress information"""
        from pyhive import hive
        unfinished_states = (
            hive.ttypes.TOperationState.INITIALIZED_STATE,
            hive.ttypes.TOperationState.RUNNING_STATE,
        )
        polled = cursor.poll()
        while polled.operationState in unfinished_states:
            query = session.query(type(query)).filter_by(id=query.id)
            if query.status == QueryStatus.STOPPED:
                cursor.cancel()
                break

            resp = cursor.fetch_logs()
            if resp and resp.log:
                progress = cls.progress(resp.log)
                if progress > query.progress:
                    query.progress = progress
                session.commit()
            time.sleep(5)
            polled = cursor.poll()
github dropbox / PyHive / pyhive / presto.py View on Github external
# TODO cancel outstanding queries?
        pass

    def commit(self):
        """Presto does not support transactions"""
        pass

    def cursor(self):
        """Return a new :py:class:`Cursor` object using the connection."""
        return Cursor(*self._args, **self._kwargs)

    def rollback(self):
        raise NotSupportedError("Presto does not have transactions")  # pragma: no cover


class Cursor(common.DBAPICursor):
    """These objects represent a database cursor, which is used to manage the context of a fetch
    operation.

    Cursors are not isolated, i.e., any changes done to the database by a cursor are immediately
    visible by other cursors or connections.
    """

    def __init__(self, host, port='8080', username=None, principal_username=None, catalog='hive',
                 schema='default', poll_interval=1, source='pyhive', session_props=None,
                 protocol='http', password=None, requests_session=None, requests_kwargs=None,
                 KerberosRemoteServiceName=None, KerberosPrincipal=None,
                 KerberosConfigPath=None, KerberosKeytabPath=None,
                 KerberosCredentialCachePath=None, KerberosUseCanonicalHostname=None):
        """
        :param host: hostname to connect to, e.g. ``presto.example.com``
        :param port: int -- port, defaults to 8080
github airbnb / omniduct / omniduct / databases / hiveserver2.py View on Github external
def _connect(self):
        from sqlalchemy import create_engine, MetaData
        if self.driver == 'pyhive':
            try:
                import pyhive.hive
            except ImportError:
                raise ImportError("""
                    Omniduct is attempting to use the 'pyhive' driver, but it
                    is not installed. Please either install the pyhive package,
                    or reconfigure this Duct to use the 'impyla' driver.
                    """)
            self.__hive = pyhive.hive.connect(host=self.host,
                                              port=self.port,
                                              auth=self.auth_mechanism,
                                              database=self.schema,
                                              username=self.username,
                                              password=self.password,
                                              **self.connection_options)
            self._sqlalchemy_engine = create_engine('hive://{}:{}/{}'.format(self.host, self.port, self.schema))
            self._sqlalchemy_metadata = MetaData(self._sqlalchemy_engine)
        elif self.driver == 'impyla':
            try:
                import impala.dbapi
            except ImportError:
                raise ImportError("""
                    Omniduct is attempting to use the 'impyla' driver, but it
                    is not installed. Please either install the impyla package,
                    or reconfigure this Duct to use the 'pyhive' driver.
github devinstevenson / pure-transport / examples / ssl_connection.py View on Github external
import puretransport
from pyhive import hive

transport = puretransport.transport_factory(host='host',
                                            port=10000,
                                            username='username',
                                            password='secret',
                                            use_ssl=True)

hive_con = hive.connect(username='username', thrift_transport=transport,
                        database='default')
cursor = hive_con.cursor()

cursor.execute("select * from table limit 1")

cursor.fetchall()
github verdict-project / verdict / docker / define_tpch_samples.py View on Github external
from pyhive import hive


verdict.set_loglevel("debug")
meta_setup = False
fast_meta = True
hostname = 'presto'
if len(sys.argv) > 1:
    hostname = sys.argv[1]
if len(sys.argv) > 2:
    if sys.argv[2].lower() == "true":
        meta_setup = True
if len(sys.argv) > 3:
    if sys.argv[3].lower() == "false":
        fast_meta = False
hive_cursor = hive.connect(hostname).cursor()

presto_conn = prestodb.dbapi.connect(
                host=hostname,
                port=8080,
                user='verdict',
                catalog='hive',
                schema='default',
                )
presto_cursor = presto_conn.cursor()
# presto_cursor = presto.connect(hostname).cursor()
v = verdict.presto(presto_host=hostname, preload_cache=True)

part_col = 'verdictcol'
sample_schema = 'verdict'

# TODO: samples for customers table
github huseinzol05 / Python-DevOps / apache-bigdata / 3.flask-hadoop-hive / app.py View on Github external
from flask import Flask, request
import os
import json
from pyhive import hive

cursor = hive.connect('localhost').cursor()
cursor.execute(
    "CREATE TABLE IF NOT EXISTS employee ( eid int, name String, salary String, destignation String) COMMENT 'employee details' ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE"
)
cursor.execute(
    "LOAD DATA LOCAL INPATH 'sample.txt' OVERWRITE INTO TABLE employee"
)

app = Flask(__name__)


@app.route('/')
def hello_world():
    return 'Hey, we have Flask in a Docker container!'


@app.route('/employee//')
github marvin-ai / marvin-python-toolbox / marvin_python_toolbox / management / hive.py View on Github external
def get_connection(self, host, db='DEFAULT', queue='default'):
        return hive.connect(host=host,
                            database=db,
                            configuration={'mapred.job.queue.name': queue,
                                           ' hive.exec.dynamic.partition.mode': 'nonstrict'})
github repertory / docker-redash / redash / query_runner / hive_ds.py View on Github external
def run_query(self, query, user):

        connection = None
        try:
            connection = hive.connect(**self.configuration.to_dict())

            cursor = connection.cursor()

            cursor.execute(query)

            column_names = []
            columns = []

            for column in cursor.description:
                column_name = column[COLUMN_NAME]
                column_names.append(column_name)

                columns.append({
                    'name': column_name,
                    'friendly_name': column_name,
                    'type': types_map.get(column[COLUMN_TYPE], None)