How to use the luigi.Parameter function in luigi

To help you get started, we’ve selected a few luigi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github constantinpape / cluster_tools / cluster_tools / lifted_features / sparse_lifted_neighborhood.py View on Github external
allow_retry = False

    graph_path = luigi.Parameter()
    graph_key = luigi.Parameter()
    node_label_path = luigi.Parameter()
    node_label_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    prefix = luigi.Parameter()
    nh_graph_depth = luigi.IntParameter()
    node_ignore_label = luigi.IntParameter(default=0)
    # different modes for adding lifted edges:
    # "all": add lifted edges between all nodes that have a label
    # "same": add lifted edges only between nodes with the same label
    # "different": add lifted edges only between nodes with different labels
    mode = luigi.Parameter(default='all')
    dependency = luigi.TaskParameter(default=DummyTask())

    modes = ('all', 'same', 'different')

    def requires(self):
        return self.dependency

    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)

        assert self.mode in self.modes, "Invalid mode %s" % self.mode

        # load the task config
        config = self.get_task_config()
github constantinpape / cluster_tools / cluster_tools / morphology / region_centers.py View on Github external
#
# Region Center Tasks
#

class RegionCentersBase(luigi.Task):
    """ RegionCenters base class
    """

    task_name = 'region_centers'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    morphology_path = luigi.Parameter()
    morphology_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    ignore_label = luigi.Parameter(default=None)
    resolution = luigi.ListParameter(default=[1, 1, 1])
    #
    dependency = luigi.TaskParameter()

    def requires(self):
        return self.dependency

    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)
github constantinpape / cluster_tools / cluster_tools / watershed / watershed.py View on Github external
from cluster_tools.cluster_tasks import SlurmTask, LocalTask, LSFTask


#
# Watershed Tasks
#

class WatershedBase(luigi.Task):
    """ Watershed base class
    """

    task_name = 'watershed'
    src_file = os.path.abspath(__file__)

    # input and output volumes
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    mask_path = luigi.Parameter(default='')
    mask_key = luigi.Parameter(default='')

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({'threshold': .5,
                       'apply_dt_2d': True, 'pixel_pitch': None,
                       'apply_ws_2d': True, 'sigma_seeds': 2., 'size_filter': 25,
                       'sigma_weights': 2., 'halo': [0, 0, 0],
                       'channel_begin': 0, 'channel_end': None,
                       'agglomerate_channels': 'mean', 'alpha': 0.8,
github constantinpape / cluster_tools / deprecated / production / evaluation / skeleton_evaluation.py View on Github external
#! /usr/bin/python

import os
import argparse
import subprocess
import json

import luigi
import z5py
from cremi_tools.skeletons import build_skeleton_metrics

from production.util import DummyTask


class SkeletonEvaluationTask(luigi.Task):
    path = luigi.Parameter()
    seg_key = luigi.Parameter()
    skeleton_keys = luigi.ListParameter()
    n_threads = luigi.IntParameter()
    tmp_folder = luigi.Parameter()
    dependency = luigi.TaskParameter(default=DummyTask())
    time_estimate = luigi.IntParameter(default=10)
    run_local = luigi.BoolParameter(default=False)

    def requires(self):
        return self.dependency

    # TODO enable ROIs
    def run(self):

        from .. import util
github miku / siskin / siskin / sources / b3kat.py View on Github external
luigi.LocalTarget(stopover).move(self.output().path)

    def output(self):
        return luigi.LocalTarget(path=self.path(ext='mrc'))


class B3KatFilterSSG(B3KatTask):
    """
    Slice out a binary MARC file from the complete dump based on SSG.

    List of shortcuts for SSG can be found here:

    * https://www.dfg.de/download/pdf/foerderung/programme/lis/fid_zwischenbilanz_umstrukturierung_foerderung_sondersammelgebiete.pdf
    * https://web.archive.org/web/20190503122507/https://www.dfg.de/download/pdf/foerderung/programme/lis/fid_zwischenbilanz_umstrukturierung_foerderung_sondersammelgebiete.pdf
    """
    ssg = luigi.Parameter(default='9,2', description='ssgn designation to be matched against 84.a')
    date = ClosestDateParameter(default=datetime.date.today())

    def requires(self):
        return B3KatDownload(date=self.date)

    def run(self):
        """
        Taken from 012_filter.sh

        unzip -p $f | tr -d '\t' | sed -e 's/\(\)/\t\1/g' | tr -d
        '\n' | tr '\t' '\n' | grep '9,2ssgn' | grep 'digit' >$t
github allenai / citeomatic / citeomatic / tasks.py View on Github external
import luigi
from citeomatic import file_util, features, training, corpus
from citeomatic.features import Featurizer
from citeomatic.models import layers
from citeomatic.models.options import ModelOptions
from citeomatic.serialization import import_from
from luigi.util import inherits

logger = logging.getLogger('citeomatic.tasks')

import faulthandler
faulthandler.enable()


class SharedParameters(luigi.Task):
    base_dir = luigi.Parameter(default=path.expanduser('~/citeomatic-data/'))

    @property
    def data_dir(self):
        return self.base_dir + '/data'

    @property
    def model_dir(self):
        return self.base_dir + '/model'

    def log(self, msg, *args):
        logger.info(msg, *args)


class DownloadCorpus(SharedParameters):
    corpus_url = luigi.Parameter(
        default=
github awslabs / aws-service-catalog-puppet / servicecatalog_puppet / luigi_tasks_and_targets.py View on Github external
def write_output(self, content):
        with self.output().open('w') as f:
            f.write(
                json.dumps(
                    content,
                    indent=4,
                    default=str,
                )
            )


class GetSSMParamTask(PuppetTask):
    parameter_name = luigi.Parameter()
    name = luigi.Parameter()
    region = luigi.Parameter(default=None)

    def params_for_results_display(self):
        return {
            "parameter_name": self.parameter_name,
            "name": self.name,
            "region": self.region,
        }

    @property
    def uid(self):
        return f"{self.region}-{self.parameter_name}-{self.name}"

    def output(self):
        return luigi.LocalTarget(
            f"output/{self.__class__.__name__}/"
            f"{self.uid}.json"
github nestauk / nesta / nesta / core / luigihacks / estask.py View on Github external
index (str): Override the elasticsearch config with this index.
        process_batch_size (int): Number of documents per batch.
        intermediate_bucket (str): S3 bucket where batch chunks are stored.
        sql_config_filename (str): SQL config path/filename in the batch task.
    '''
    routine_id = luigi.Parameter()
    db_config_path = luigi.Parameter('mysqldb.config')
    endpoint = luigi.Parameter()
    dataset = luigi.Parameter()
    entity_type = luigi.Parameter()
    kwargs = luigi.DictParameter(default={})
    index = luigi.Parameter(default=None)
    process_batch_size = luigi.IntParameter(default=5000)
    intermediate_bucket = luigi.Parameter('nesta-production'
                                          '-intermediate')
    sql_config_filename = luigi.Parameter('mysqldb.config')

    @property
    @functools.lru_cache()
    def _done_ids(self):
        return self.done_ids()

    @abstractmethod
    def done_ids(self):
        '''All document ids which do not require processing. If
        you want to avoid writing that function see
        :obj:`LazyElasticsearchTask`.

        Returns:
            done_ids (set): A set of document ids, not to be processed.
        '''
        pass
github constantinpape / cluster_tools / cluster_tools / lifted_multicut / solve_lifted_global.py View on Github external
# Lifted Multicut Tasks
#


class SolveLiftedGlobalBase(luigi.Task):
    """ SolveLiftedGlobal base class
    """

    task_name = 'solve_lifted_global'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    # input volumes and graph
    problem_path = luigi.Parameter()
    assignment_path = luigi.Parameter()
    assignment_key = luigi.Parameter()
    scale = luigi.IntParameter()
    lifted_prefix = luigi.Parameter()
    #
    dependency = luigi.TaskParameter()

    def requires(self):
        return self.dependency

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({'agglomerator': 'kernighan-lin',
                       'time_limit_solver': None})
        return config
github ethanluoyc / statsnba-playbyplay / luigitasks / matchups.py View on Github external
import luigi
import pandas as pd
from playbyplays import ProcessPlayByPlay, get_season_game_ids


class PbPToMatchups(luigi.Task):
    game_id = luigi.Parameter()
    output_file_format = 'matchups_{game_id}.csv'

    def requires(self):
        return ProcessPlayByPlay(game_id=self.game_id)

    def run(self):
        with self.input().open() as in_file:
            game_pbp = pd.read_csv(in_file,
                                   engine='c',
                                   dtype={'game_id': str},
                                   converters={'period_elapsed_time': pd.to_timedelta,
                                               'overall_elapsed_time': pd.to_timedelta,
                                               'period_remaining_time': pd.to_timedelta,
                                               'overall_remaining_time': pd.to_timedelta})

        # fill up the score columns for better tabulation