How to use the luigi.BoolParameter function in luigi

To help you get started, we’ve selected a few luigi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github constantinpape / cluster_tools / deprecated / production / multicut / multicut.py View on Github external
"""

    # path to the n5 file and keys
    path = luigi.Parameter()
    aff_key = luigi.Parameter()
    ws_key = luigi.Parameter()
    out_key = luigi.Parameter()
    # dummy parameter to be consistent with other segmentation tasks
    max_jobs = luigi.IntParameter()
    # path to the configuration
    config_path = luigi.Parameter()
    tmp_folder = luigi.Parameter()
    dependency = luigi.TaskParameter()
    # FIXME default does not work; this still needs to be specified
    time_estimate = luigi.IntParameter(default=10)
    run_local = luigi.BoolParameter(default=False)

    def requires(self):
        return self.dependency

    def _collect_outputs(self):
        res_path = os.path.join(self.tmp_folder, 'multicut_time.json')
        try:
            assert os.path.exists(res_path)
            with open(res_path) as f:
                t = json.load(f)['t']
            os.remove(res_path)
        except Exception:
            return None
        return t

    def run(self):
github spotify / luigi / examples / top_artists.py View on Github external
yield int(streams), artist


class ArtistToplistToDatabase(luigi.contrib.postgres.CopyToTable):
    """
    This task runs a :py:class:`luigi.contrib.postgres.CopyToTable` task
    over the target data returned by :py:meth:`~/.Top10Artists.output` and
    writes the result into its :py:meth:`~.ArtistToplistToDatabase.output` target which,
    by default, is :py:class:`luigi.contrib.postgres.PostgresTarget` (a table in PostgreSQL).

    This class uses :py:meth:`luigi.contrib.postgres.CopyToTable.run`
    and :py:meth:`luigi.contrib.postgres.CopyToTable.output`.
    """

    date_interval = luigi.DateIntervalParameter()
    use_spark = luigi.BoolParameter()

    host = "localhost"
    database = "toplists"
    user = "luigi"
    password = "abc123"  # ;)
    table = "top10"

    columns = [("date_from", "DATE"),
               ("date_to", "DATE"),
               ("artist", "TEXT"),
               ("streams", "INT")]

    def requires(self):
        """
        This task's dependencies:
github awslabs / aws-service-catalog-puppet / servicecatalog_puppet / workflow / provisioning.py View on Github external
"OWNER": f"arn:aws:iam::{self.account_id}:role/servicecatalog-puppet/PuppetRole"
                        },
                    )
            self.write_output(changes_made)


class RunDeployInSpokeTask(tasks.PuppetTask):
    manifest_file_path = luigi.Parameter()
    puppet_account_id = luigi.Parameter()
    account_id = luigi.Parameter()

    home_region = luigi.Parameter()
    regions = luigi.ListParameter()
    should_collect_cloudformation_events = luigi.BoolParameter()
    should_forward_events_to_eventbridge = luigi.BoolParameter()
    should_forward_failures_to_opscenter = luigi.BoolParameter()

    def params_for_results_display(self):
        return {
            "manifest_file_path": self.manifest_file_path,
            "puppet_account_id": self.puppet_account_id,
            "account_id": self.account_id,
        }

    def run(self):
        with betterboto_client.CrossAccountClientContextManager(
            "s3",
            f"arn:aws:iam::{self.puppet_account_id}:role/servicecatalog-puppet/PuppetRole",
            f"s3-{self.puppet_account_id}",
        ) as s3:
            bucket = f"sc-puppet-spoke-deploy-{self.puppet_account_id}"
            key = f"{os.getenv('CODEBUILD_BUILD_NUMBER', '0')}.yaml"
github constantinpape / cluster_tools / deprecated / production / blockwise_multicut / graph / initial_subgraph.py View on Github external
class InitialSubgraphTask(luigi.Task):
    """
    Compute initial sub-graphs
    """

    path = luigi.Parameter()
    ws_key = luigi.Parameter()
    out_path = luigi.Parameter()
    max_jobs = luigi.Parameter()
    config_path = luigi.Parameter()
    tmp_folder = luigi.Parameter()
    dependency = luigi.TaskParameter()
    # FIXME default does not work; this still needs to be specified
    time_estimate = luigi.IntParameter(default=10)
    run_local = luigi.BoolParameter(default=False)

    def requires(self):
        return self.dependency

    def _prepare_jobs(self, n_jobs, block_list, block_shape):
        for job_id in range(n_jobs):
            block_jobs = block_list[job_id::n_jobs]
            job_config = {'block_shape': block_shape,
                          'block_list': block_jobs}
            config_path = os.path.join(self.tmp_folder, 'initial_subgraph_config_job%i.json' % job_id)
            with open(config_path, 'w') as f:
                json.dump(job_config, f)

    def _submit_job(self, job_id):
        script_path = os.path.join(self.tmp_folder, 'initial_subgraph.py')
        config_path = os.path.join(self.tmp_folder, 'initial_subgraph_config_job%i.json' % job_id)
github allenai / citeomatic / citeomatic / tasks.py View on Github external
corpus_name = corpus_suffix.replace('.zip', '.sqlite')
        return luigi.LocalTarget(path.join(self.data_dir, corpus_name))

    def run(self):
        try:
            corpus.build_corpus(self.output().path + '.tmp', self.input()['corpus'].path)
            os.rename(self.output().path + '.tmp', self.output().path)
        except:
            os.system("rm -rf '%s'" % self.output().path + '.tmp')
            raise


class CreateFeaturizer(SharedParameters):
    training_fraction = luigi.FloatParameter(default=0.8)
    use_bigrams = luigi.BoolParameter(default=False)
    use_unigrams = luigi.BoolParameter(default=True)
    max_features = luigi.IntParameter(default=100000000)
    name = luigi.Parameter('default')

    def requires(self):
        return {'corpus': BuildCorpus()}

    def output(self):
        return luigi.LocalTarget(
            path.join(self.model_dir, 'featurizer-%s.pickle' % self.name)
        )

    def run(self):
        logger.info(
            "Loading corpus from file %s " % self.input()['corpus'].path
        )
        c = corpus.Corpus.load(self.input()['corpus'].path, self.training_fraction)
github nestauk / nesta / nesta / core / routines / health_data / nih_data / mesh_join_task.py View on Github external
key_prefix = 'nih_abstracts_processed/mti'

class MeshJoinTask(luigi.Task):
    '''Joins MeSH labels stored in S3 to NIH projects in MySQL.

    Args:
        date (str): Date used to label the outputs
        _routine_id (str): String used to label the AWS task
        db_config_env (str): Environment variable for path to MySQL database
            configuration.
    '''

    date = luigi.DateParameter()
    _routine_id = luigi.Parameter()
    db_config_env = luigi.Parameter()
    test = luigi.BoolParameter()

    @staticmethod
    def format_mesh_terms(df):
        """
        Removes unrequired columns and pivots the mesh terms data into a dictionary.

        Args:
            df (dataframe): mesh terms as returned from retrieve_mesh_terms

        Returns:
            (dict): document_id: list of mesh terms
        """
        logging.info("Formatting mesh terms")
        # remove PRC rows
        df = df.drop(df[df.term == 'PRC'].index, axis=0)
github constantinpape / cluster_tools / deprecated / production / evaluation / skeleton_evaluation.py View on Github external
import luigi
import z5py
from cremi_tools.skeletons import build_skeleton_metrics

from production.util import DummyTask


class SkeletonEvaluationTask(luigi.Task):
    path = luigi.Parameter()
    seg_key = luigi.Parameter()
    skeleton_keys = luigi.ListParameter()
    n_threads = luigi.IntParameter()
    tmp_folder = luigi.Parameter()
    dependency = luigi.TaskParameter(default=DummyTask())
    time_estimate = luigi.IntParameter(default=10)
    run_local = luigi.BoolParameter(default=False)

    def requires(self):
        return self.dependency

    # TODO enable ROIs
    def run(self):

        from .. import util

        # copy the script to the temp folder and replace the shebang
        file_dir = os.path.dirname(os.path.abspath(__file__))
        script_path = os.path.join(self.tmp_folder, 'skeleton_evaluation.py')
        util.copy_and_replace(os.path.join(file_dir, 'skeleton_evaluation.py'),
                              script_path)

        # check that inputs exist
github nestauk / nesta / nesta / core / routines / meetup / health_tagging / topic_discovery_task.py View on Github external
defined as the most frequently occurring from a set of categories.

    Args:
        db_config_env (str): Environmental variable pointing to the path of the DB config.
        routine_id (str): The routine UID.
        core_categories (list): A list of category_shortnames from which to identify topics.
        members_perc (int): A percentile to evaluate the minimum number of members.
        topic_perc (int): A percentile to evaluate the most frequent topics.
        test (bool): Test mode.
    '''
    db_config_env = luigi.Parameter()
    routine_id = luigi.Parameter()
    core_categories = luigi.ListParameter()
    members_perc = luigi.IntParameter(default=10)
    topic_perc = luigi.IntParameter(default=10)
    test = luigi.BoolParameter(default=True)

    def output(self):
        '''Points to the S3 Target'''
        return s3.S3Target(f"{S3PREFIX}/meetup-topics-{self.routine_id}.json")

    def run(self):
        '''Extract the topics of interest'''
        database = 'dev' if self.test else 'production'
        engine = get_mysql_engine(self.db_config_env, 'mysqldb', database)
        members_limit = get_members_by_percentile(engine, perc=self.members_perc)
        topics = get_core_topics(engine,
                                 core_categories=self.core_categories,
                                 members_limit=members_limit,
                                 perc=self.topic_perc)

        # Write the intermediate output
github constantinpape / cluster_tools / deprecated / production / components / workflow_task.py View on Github external
# path to the n5 file and keys
    path = luigi.Parameter()
    aff_key = luigi.Parameter()
    mask_key = luigi.Parameter()
    out_key = luigi.Parameter()
    # maximal number of jobs that will be run in parallel
    max_jobs = luigi.IntParameter()
    # path to the configuration
    # TODO allow individual paths for individual blocks
    config_path = luigi.Parameter()
    tmp_folder = luigi.Parameter()
    # FIXME default does not work; this still needs to be specified
    # TODO different time estimates for different sub-tasks
    time_estimate = luigi.IntParameter(default=10)
    run_local = luigi.BoolParameter(default=False)

    def requires(self):

        thresh_task = ThresholdTask(path=self.path, aff_key=self.aff_key,
                                    mask_key=self.mask_key, out_key=self.out_key,
                                    max_jobs=self.max_jobs, config_path=self.config_path,
                                    tmp_folder=self.tmp_folder, time_estimate=self.time_estimate,
                                    run_local=self.run_local)
        offset_task = OffsetTask(tmp_folder=self.tmp_folder, dependency=thresh_task,
                                 time_estimate=self.time_estimate, run_local=self.run_local)
        merge_task = MergeTask(path=self.path, out_key=self.out_key, config_path=self.config_path,
                               max_jobs=self.max_jobs, tmp_folder=self.tmp_folder,
                               dependency=offset_task,
                               time_estimate=self.time_estimate, run_local=self.run_local)
        assignment_task = NodeAssignmentTask(path=self.path, out_key=self.out_key, config_path=self.config_path,
                                             max_jobs=self.max_jobs, tmp_folder=self.tmp_folder,
github constantinpape / cluster_tools / cluster_tools / affinities / gradients.py View on Github external
#
# Block-wise gradient computation tasks
#

class GradientsBase(luigi.Task):
    """ Gradients base class
    """

    task_name = 'gradients'
    src_file = os.path.abspath(__file__)
    allow_retry = True

    path_dict = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    average_gradient = luigi.BoolParameter(default=True)
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    def _validate_paths(self):
        shape = None

        with open(self.path_dict) as f:
            path_dict = json.load(f)

        for path in sorted(path_dict):
            key = path_dict[path]
            assert os.path.exists(path)
            with vu.file_reader(path, 'r') as f:
                assert key in f