How to use the luigi.DictParameter function in luigi

To help you get started, we’ve selected a few luigi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github constantinpape / cluster_tools / cluster_tools / downscaling / downscaling_workflow.py View on Github external
    @staticmethod
    def get_config():
        configs = super(DownscalingWorkflow, DownscalingWorkflow).get_config()
        configs.update({'downscaling': downscale_tasks.DownscalingLocal.default_task_config(),
                        'copy_volume': copy_tasks.CopyVolumeLocal.default_task_config()})
        return configs


# HDF5 is frickin slow, so it seems to be better to do the
# computations in n5 and then copy data to h5
class PainteraToBdvWorkflow(WorkflowBase):
    input_path = luigi.Parameter()
    input_key_prefix = luigi.Parameter()
    output_path = luigi.Parameter()
    dtype = luigi.Parameter(default=None)
    metadata_dict = luigi.DictParameter(default={})
    skip_existing_levels = luigi.BoolParameter(default=True)

    # we offset the scale by 1 because
    # 0 indicates the original resoulution
    def get_scale_key(self, scale, metadata_format):
        if metadata_format == 'paintera':
            prefix = 's%i' % scale
            out_key = os.path.join(self.input_key_prefix, prefix)
        elif metadata_format == 'bdv':
            # we only support a single time-point and single set-up for now
            # TODO support multiple set-ups for multi-channel data
            out_key = 't00000/s00/%i/cells' % scale
        return out_key

    def get_scales(self):
        with file_reader(self.input_path, 'r') as f:
github awslabs / aws-service-catalog-puppet / servicecatalog_puppet / luigi_tasks_and_targets.py View on Github external
},
                    indent=4,
                    default=str,
                )
            )
        logger.info(f"[{self.portfolio}] {self.account_id}:{self.region} :: Finished importing")


class CreateLaunchRoleConstraintsForPortfolio(PuppetTask):
    account_id = luigi.Parameter()
    region = luigi.Parameter()
    portfolio = luigi.Parameter()
    hub_portfolio_id = luigi.Parameter()
    puppet_account_id = luigi.Parameter()

    launch_constraints = luigi.DictParameter()

    dependencies = luigi.ListParameter(default=[])

    post_actions = luigi.ListParameter()

    should_use_sns = luigi.Parameter(default=False, significant=False)

    def requires(self):
        return {
            'create_spoke_local_portfolio_task': ImportIntoSpokeLocalPortfolioTask(
                account_id=self.account_id,
                region=self.region,
                portfolio=self.portfolio,
                hub_portfolio_id=self.hub_portfolio_id,
            ),
            'deps': [ProvisionProductTask(**dependency) for dependency in self.dependencies]
github DocNow / dnflow / summarize.py View on Github external
i = 0
            for tweet in t.search(term):
                i += 1
                if i > count:
                    break
                if i % 500 == 0:
                    self.update_job(
                        date_path=self.search['date_path'],
                        status="STARTED: %s - %s/%s" %
                               (self.task_family, i, count)
                    )
                fh.write(json.dumps(tweet) + '\n')


class CountHashtags(EventfulTask):
    search = luigi.DictParameter()

    def requires(self):
        return FetchTweets(search=self.search)

    def output(self):
        fname = self.input().fn.replace('tweets.json', 'count-hashtags.csv')
        return luigi.LocalTarget(fname)

    def run(self):
        c = Counter()
        for tweet_str in self.input().open('r'):
            tweet = json.loads(tweet_str)
            c.update([ht['text'].lower()
                      for ht in tweet['entities']['hashtags']])
        with self.output().open('w') as fp_counts:
            writer = csv.DictWriter(fp_counts, delimiter=',',
github spotify / luigi / luigi / contrib / hive.py View on Github external
if not location:
            raise Exception("Couldn't find location for table: {0}".format(str(self)))
        return location

    def open(self, mode):
        return NotImplementedError("open() is not supported for HivePartitionTarget")


class ExternalHiveTask(luigi.ExternalTask):
    """
    External task that depends on a Hive table/partition.
    """

    database = luigi.Parameter(default='default')
    table = luigi.Parameter()
    partition = luigi.DictParameter(
        default={},
        description='Python dictionary specifying the target partition e.g. {"date": "2013-01-25"}'
    )

    def output(self):
        if self.partition:
            return HivePartitionTarget(
                database=self.database,
                table=self.table,
                partition=self.partition,
            )
        else:
            return HiveTableTarget(
                database=self.database,
                table=self.table,
            )
github m3dev / redshells / redshells / app / word_item_similarity / build_word_item_similarity.py View on Github external
booster='gbtree',
            gamma=0,
            min_child_weight=1,
            max_delta_step=0,
            subsample=1,
            colsample_bytree=1,
            colsample_bylevel=1,
            reg_alpha=0,
            reg_lambda=1,
            scale_pos_weight=1,
            base_score=0.5))  # type: Dict[str, Any]

    dictionary_filter_kwargs = luigi.DictParameter(
        default=dict(no_below=5, no_above=0.5, keep_n=100000, keep_tokens=None))  # type: Dict[str, Any]

    fasttext_kwargs = luigi.DictParameter(
        default=dict(
            corpus_file=None,
            sg=0,
            hs=0,
            size=200,
            alpha=0.025,
            window=5,
            min_count=5,
            max_vocab_size=None,
            word_ngrams=1,
            sample=1e-3,
            seed=1,
            workers=3,
            min_alpha=0.0001,
            negative=5,
            ns_exponent=0.75,
github m3dev / redshells / redshells / train / train_binary_clasification_model.py View on Github external
import gokart
import luigi
import sklearn

import redshells
import redshells.train.utils


class _BinaryClassificationModelTask(gokart.TaskOnKart):
    train_data_task = gokart.TaskInstanceParameter(
        description='A task outputs a pd.DataFrame with columns={`target_column_name`}.')
    target_column_name = luigi.Parameter(default='category', description='Category column names.')  # type: str
    model_name = luigi.Parameter(
        description='A model name which has "fit" interface, and must be registered by "register_prediction_model".'
    )  # type: str
    model_kwargs = luigi.DictParameter(
        default=dict(), description='Arguments of the model which are created with model_name.')  # type: Dict[str, Any]

    def requires(self):
        return self.train_data_task

    def output(self):
        return self.make_target(self.output_file_path)

    def create_model(self):
        return redshells.factory.create_prediction_model(self.model_name, **self.model_kwargs)

    def create_train_data(self):
        data = self.load_data_frame(required_columns={self.target_column_name})

        data = sklearn.utils.shuffle(data)
        y = data[self.target_column_name].values
github nestauk / nesta / nesta / core / luigihacks / sql2estask.py View on Github external
entity_type (str): Name of the entity type to label this task with.
        kwargs (dict): Any other job parameters to pass to the batchable.
    '''
    date = luigi.DateParameter()
    routine_id = luigi.Parameter()
    intermediate_bucket = luigi.Parameter()
    db_config_env = luigi.Parameter()
    db_section = luigi.Parameter(default="mysqldb")
    process_batch_size = luigi.IntParameter(default=10000)
    drop_and_recreate = luigi.BoolParameter(default=False)
    dataset = luigi.Parameter()
    endpoint = luigi.Parameter()
    id_field = luigi.Parameter()
    filter = luigi.Parameter(default=None)
    entity_type = luigi.Parameter()
    kwargs = luigi.DictParameter(default={})

    def output(self):
        '''Points to the output database engine'''
        self.db_config_path = os.environ[self.db_config_env]
        db_config = get_config(self.db_config_path, "mysqldb")
        db_config["database"] = 'dev' if self.test else 'production'
        db_config["table"] = f"{self.routine_id} "  # Not a real table
        update_id = f"{self.routine_id}_{self.date}"
        return MySqlTarget(update_id=update_id, **db_config)

    def prepare(self):
        if self.test:
            self.process_batch_size = 1000
            logging.warning("Batch size restricted to "
                            f"{self.process_batch_size}"
                            " while in test mode")
github awslabs / aws-service-catalog-factory / servicecatalog_factory / luigi_tasks_and_targets.py View on Github external
else:
            raise Exception(f"Unknown type: {self.type}")

        with self.output().open('w') as output_file:
            output_file.write(rendered)


class CreateVersionPipelineTask(FactoryTask):
    all_regions = luigi.ListParameter()
    version = luigi.DictParameter()
    product = luigi.DictParameter()

    provisioner = luigi.DictParameter()

    products_args_by_region = luigi.DictParameter()

    factory_version = luigi.Parameter()
    region = luigi.Parameter()

    tags = luigi.ListParameter()

    def output(self):
        return luigi.LocalTarget(
            f"output/CreateVersionPipelineTask/"
            f"{self.product.get('Name')}_{self.version.get('Name')}.template.yaml"
        )

    def requires(self):
        return CreateVersionPipelineTemplateTask(
            all_regions=self.all_regions,
            version=self.version,
github m3dev / redshells / redshells / app / word_item_similarity / build_word_item_similarity.py View on Github external
class WordItemSimilarityConfig(luigi.Config):
    task_namespace = 'redshells.word_item_similarity'
    matrix_factorization_kwargs = luigi.DictParameter(
        default=dict(
            n_latent_factors=20,
            learning_rate=1e-3,
            reg_item=1e-5,
            reg_user=1e-5,
            batch_size=2**10,
            epoch_size=30,
            test_size=0.1,
        ))  # type: Dict[str, Any]

    xgb_classifier_kwargs = luigi.DictParameter(
        default=dict(
            max_depth=5,
            learning_rate=0.1,
            n_estimators=300,
            silent=True,
            objective="binary:logistic",
            booster='gbtree',
            gamma=0,
            min_child_weight=1,
            max_delta_step=0,
            subsample=1,
            colsample_bytree=1,
            colsample_bylevel=1,
            reg_alpha=0,
            reg_lambda=1,
            scale_pos_weight=1,
github GeoscienceAustralia / wagl / wagl / multifile_workflow.py View on Github external
with self.output().temporary_path() as out_fname:
            _calculate_angles(acqs[0], self.input().path, out_fname,
                              self.compression, self.filter_opts, self.tle_path)


class AncillaryData(luigi.Task):

    """Get all ancillary data."""

    level1 = luigi.Parameter()
    work_root = luigi.Parameter(significant=False)
    granule = luigi.OptionalParameter(default='')
    vertices = luigi.TupleParameter()
    workflow = luigi.EnumParameter(enum=Workflow)
    acq_parser_hint = luigi.OptionalParameter(default='')
    aerosol = luigi.DictParameter({'user': 0.05}, significant=False)
    brdf_path = luigi.Parameter(significant=False)
    brdf_premodis_path = luigi.Parameter(significant=False)
    ozone_path = luigi.Parameter(significant=False)
    water_vapour = luigi.DictParameter({'user': 1.5}, significant=False)
    dsm_fname = luigi.Parameter(significant=False)
    ecmwf_path = luigi.Parameter(significant=False)
    invariant_height_fname = luigi.Parameter(significant=False)
    compression = luigi.EnumParameter(enum=H5CompressionFilter,
                                      default=H5CompressionFilter.LZF,
                                      significant=False)
    filter_opts = luigi.DictParameter(default=None, significant=False)

    def requires(self):
        group = acquisitions(self.level1, self.acq_parser_hint).supported_groups[0]
        args = [self.level1, self.work_root, self.granule, group]
        return CalculateSatelliteAndSolarGrids(*args)