How to use the luigi.FloatParameter function in luigi

To help you get started, we’ve selected a few luigi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github m3dev / redshells / redshells / data / data_frame_utils.py View on Github external
def output(self):
        return self.make_target(self.output_file_path)

    def run(self):
        categorical_column_names = list(self.categorical_column_names)
        data = self.load_data_frame(required_columns=set(categorical_column_names))
        for c in self.categorical_column_names:
            data[c] = data[c].astype('category')
        self.dump(data)


class SplitTrainTestData(gokart.TaskOnKart):
    task_namespace = 'redshells.data_frame_utils'
    data_task = gokart.TaskInstanceParameter()
    test_size_rate = luigi.FloatParameter()
    train_output_file_path = luigi.Parameter(default='data/train_data.pkl')  # type: str
    test_output_file_path = luigi.Parameter(default='data/test_data.pkl')  # type: str

    def requires(self):
        return self.data_task

    def output(self):
        return dict(
            train=self.make_target(self.train_output_file_path), test=self.make_target(self.test_output_file_path))

    def run(self):
        data = self.load_data_frame()
        data = sklearn.utils.shuffle(data)
        train, test = sklearn.model_selection.train_test_split(data, test_size=self.test_size_rate)
        self.dump(train, 'train')
        self.dump(test, 'test')
github spotify / luigi / luigi / contrib / lsf.py View on Github external
subprocess.call(['bkill', job_id])


class LSFJobTask(luigi.Task):
    """
    Takes care of uploading and executing an LSF job
    """

    n_cpu_flag = luigi.IntParameter(default=2, significant=False)
    shared_tmp_dir = luigi.Parameter(default='/tmp', significant=False)
    resource_flag = luigi.Parameter(default='mem=8192', significant=False)
    memory_flag = luigi.Parameter(default='8192', significant=False)
    queue_flag = luigi.Parameter(default='queue_name', significant=False)
    runtime_flag = luigi.IntParameter(default=60)
    job_name_flag = luigi.Parameter(default='')
    poll_time = luigi.FloatParameter(
        significant=False, default=5,
        description="specify the wait time to poll bjobs for the job status")
    save_job_info = luigi.BoolParameter(default=False)
    output = luigi.Parameter(default='')
    extra_bsub_args = luigi.Parameter(default='')

    job_status = None

    def fetch_task_failures(self):
        """
        Read in the error file from bsub
        """
        error_file = os.path.join(self.tmp_dir, "job.err")
        if os.path.isfile(error_file):
            with open(error_file, "r") as f_err:
                errors = f_err.readlines()
github constantinpape / cluster_tools / cluster_tools / distances / distance_workflow.py View on Github external
distances = pickle.load(f)
                res_dict.update(distances)
            with open(self.output_path, 'wb') as f:
                pickle.dump(res_dict, f)

    def output(self):
        return luigi.LocalTarget(self.output_path)


class PairwiseDistanceWorkflow(WorkflowBase):
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    morphology_path = luigi.Parameter()
    morphology_key = luigi.Parameter()
    output_path = luigi.Parameter()
    max_distance = luigi.FloatParameter()
    resolution = luigi.ListParameter()
    max_size = luigi.IntParameter(default=None)

    def requires(self):
        distance_task = getattr(distance_tasks,
                                self._get_task_name('ObjectDistances'))
        dep = distance_task(tmp_folder=self.tmp_folder, max_jobs=self.max_jobs,
                            config_dir=self.config_dir,
                            input_path=self.input_path, input_key=self.input_key,
                            morphology_path=self.morphology_path,
                            morphology_key=self.morphology_key,
                            max_distance=self.max_distance, resolution=self.resolution,
                            max_size=self.max_size)
        dep = MergePairwiseDistances(tmp_folder=self.tmp_folder, max_jobs=self.max_jobs,
                                     output_path=self.output_path, dependency=dep)
        return dep
github m3dev / redshells / redshells / app / word_item_similarity / filter_item_by_word_similarity.py View on Github external
from collections import defaultdict
from logging import getLogger

import luigi
import numpy as np

import gokart

logger = getLogger(__name__)


class FilterItemByWordSimilarity(gokart.TaskOnKart):
    word2items_task = gokart.TaskInstanceParameter()
    word2embedding_task = gokart.TaskInstanceParameter()
    item2title_embedding_task = gokart.TaskInstanceParameter()
    no_below = luigi.FloatParameter()
    output_file_path = luigi.Parameter(
        default='app/word_item_similarity/filter_item_by_word_similarity.pkl')  # type: str

    def requires(self):
        return dict(
            word2items=self.word2items_task,
            word2embedding=self.word2embedding_task,
            item2title_embedding=self.item2title_embedding_task)

    def output(self):
        return self.make_target(self.output_file_path)

    def run(self):
        word2items = self.load('word2items')
        word2embedding = self.load('word2embedding')
        item2title_embedding = self.load('item2title_embedding')
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / CAT / base_tasks.py View on Github external
no_evaluate_dependency = luigi.BoolParameter(default=False)
    # AugustusTM(R) parameters
    augustus = luigi.BoolParameter(default=False)
    augustus_species = luigi.Parameter(default='human', significant=False)
    augustus_hints_db = luigi.Parameter(default=None)
    tm_cfg = luigi.Parameter(default='augustus_cfgs/extrinsic.ETM1.cfg', significant=False)
    tmr_cfg = luigi.Parameter(default='augustus_cfgs/extrinsic.ETM2.cfg', significant=False)
    # AugustusCGP parameters
    augustus_cgp = luigi.BoolParameter(default=False)
    cgp_param = luigi.Parameter(default='augustus_cfgs/log_reg_parameters_default.cfg', significant=False)
    augustus_cgp_cfg_template = luigi.Parameter(default='augustus_cfgs/cgp_extrinsic_template.cfg', significant=False)
    maf_chunksize = luigi.IntParameter(default=2500000, significant=False)
    maf_overlap = luigi.IntParameter(default=500000, significant=False)
    # consensus options
    resolve_split_genes = luigi.BoolParameter(default=False)
    cgp_splice_support = luigi.FloatParameter(default=0.8, significant=False)
    cgp_num_exons = luigi.IntParameter(default=3, significant=False)
    # Toil options
    batchSystem = luigi.Parameter(default='singleMachine', significant=False)
    maxCores = luigi.IntParameter(default=32, significant=False)
    logLevel = luigi.Parameter(default='WARNING', significant=False)  # this is passed to toil
    cleanWorkDir = luigi.Parameter(default='onSuccess', significant=False)  # debugging option
    parasolCommand = luigi.Parameter(default=None, significant=False)
    defaultMemory = luigi.IntParameter(default=8 * 1024 ** 3, significant=False)
    workDir = luigi.Parameter(default=None, significant=False)
    disableCaching = luigi.BoolParameter(default=False, significant=False)

    def __repr__(self):
        """override the repr to make logging cleaner"""
        # we are in a genome-specific task, so say so
        if hasattr(self, 'genome'):
            return 'Task: {} for {}'.format(self.__class__.__name__, self.genome)
github ericwayman / luigi_gdb_pipeline_demo / pca_pipeline / pipeline.py View on Github external
os.getenv("GPDB_PORT")
                )

register_adapter(QuotedIdentifier, lambda x: x)

class DatabaseConfig(luigi.Config):
    base_table=luigi.Parameter()
    feature_input_table=luigi.Parameter()
    pca_input_base_table=luigi.Parameter()
    pca_output_base_table=luigi.Parameter()
    outlier_base_table=luigi.Parameter()

class ModelConfig(luigi.Config):
    user_col=luigi.Parameter()
    percentage_val=luigi.FloatParameter()
    threshold = luigi.FloatParameter()

#some default params
TARGET_PATH=os.path.join(os.path.dirname(__file__),'target/{feature}_{date}'.format(
                feature=ModelConfig().user_col,
                date=date.today())
                )

#config classes
class PathConfig(luigi.Config):
    target_path=luigi.Parameter(default=TARGET_PATH)

def table_names_dict(id):
        pca_tables = {
        'hour':id,
        'pca_input':DatabaseConfig().pca_input_base_table+'_{}'.format(id),
        'pca_output':DatabaseConfig().pca_output_base_table+'_{}'.format(id),
github m3dev / redshells / redshells / train / train_feature_aggregation_similarity_model.py View on Github external
import gokart

from redshells.model import FeatureAggregationSimilarityModel
from redshells.model.feature_aggregation_similarity_model import FeatureAggregationSimilarityDataset

logger = getLogger(__name__)


class TrainFeatureAggregationSimilarityModel(gokart.TaskOnKart):
    dataset_task = gokart.TaskInstanceParameter(description='An instance of task which outputs `FeatureAggregationSimilarityDataset`.')
    embedding_size = luigi.IntParameter()  # type: int
    learning_rate = luigi.FloatParameter()  # type: float
    batch_size = luigi.IntParameter()  # type: int
    epoch_size = luigi.IntParameter()  # type: int
    test_size_rate = luigi.FloatParameter()  # type: float
    early_stopping_patience = luigi.IntParameter()  # type: int
    max_data_size = luigi.IntParameter()  # type: int
    output_file_path = luigi.Parameter(default='model/feature_aggregation)similarity_model.pkl')  # type: str

    def requires(self):
        return self.dataset_task

    def output(self):
        return self.make_target(self.output_file_path)

    def run(self):
        dataset = self.load()  # type: FeatureAggregationSimilarityDataset
        feature_size = dataset.x_item_features.shape[1]
        item_size = max(np.max(dataset.x_item_indices), np.max(dataset.y_item_indices))
        max_feature_index = max(np.max(dataset.x_item_features), np.max(dataset.y_item_features))
github riga / law / law / contrib / workflow / htcondor / __init__.py View on Github external
workflow_proxy_cls = HTCondorWorkflowProxy

    pool = luigi.Parameter(default=NO_STR, significant=False, description="target htcondor pool")
    scheduler = luigi.Parameter(default=NO_STR, significant=False, description="target htcondor "
        "scheduler")
    retries = luigi.IntParameter(default=5, significant=False, description="number of automatic "
        "resubmission attempts per job, default: 5")
    tasks_per_job = luigi.IntParameter(default=1, significant=False, description="number of tasks "
        "to be processed by one job, default: 1")
    only_missing = luigi.BoolParameter(significant=False, description="skip tasks that are "
        "considered complete")
    no_poll = luigi.BoolParameter(significant=False, description="just submit, do not initiate "
        "status polling after submission")
    threads = luigi.IntParameter(default=4, significant=False, description="number of threads to "
        "use for (re)submission and status queries, default: 4")
    interval = luigi.FloatParameter(default=3, significant=False, description="time between status "
        "polls in minutes, default: 3")
    walltime = luigi.FloatParameter(default=48, significant=False, description="maximum wall time "
        "in hours, default: 48")
    max_poll_fails = luigi.IntParameter(default=5, significant=False, description="maximum number "
        "of consecutive errors during polling, default: 5")
    cancel_jobs = luigi.BoolParameter(default=False, description="cancel all submitted jobs, no "
        "new submission")
    transfer_logs = luigi.BoolParameter(significant=False, description="transfer job logs to the "
        "output directory")

    exclude_params_branch = {"pool", "scheduler", "retries", "tasks_per_job", "only_missing",
        "no_poll", "threads", "interval", "walltime", "max_poll_fails", "cancel_jobs",
        "transfer_logs"}

    @abstractmethod
    def htcondor_output_directory(self):
github GeoscienceAustralia / wagl / wagl / singlefile_workflow.py View on Github external
vertices = luigi.TupleParameter(default=(5, 5))
    method = luigi.EnumParameter(enum=Method, default=Method.SHEAR)
    pixel_quality = luigi.BoolParameter()
    land_sea_path = luigi.Parameter()
    aerosol = luigi.DictParameter(default={'user': 0.05}, significant=False)
    brdf_path = luigi.Parameter(significant=False)
    brdf_premodis_path = luigi.Parameter(significant=False)
    ozone_path = luigi.Parameter(significant=False)
    water_vapour = luigi.DictParameter(default={'user': 1.5},
                                       significant=False)
    ecmwf_path = luigi.Parameter(significant=False)
    invariant_height_fname = luigi.Parameter(significant=False)
    dsm_fname = luigi.Parameter(significant=False)
    modtran_exe = luigi.Parameter(significant=False)
    tle_path = luigi.Parameter(significant=False)
    rori = luigi.FloatParameter(default=0.52, significant=False)
    compression = luigi.EnumParameter(enum=H5CompressionFilter,
                                      default=H5CompressionFilter.LZF,
                                      significant=False)
    filter_opts = luigi.DictParameter(default=None, significant=False)
    acq_parser_hint = luigi.OptionalParameter(default='')
    buffer_distance = luigi.FloatParameter(default=8000, significant=False)
    h5_driver = luigi.OptionalParameter(default='', significant=False)

    def output(self):
        fmt = '{label}.wagl.h5'
        label = self.granule if self.granule else basename(self.level1)
        out_fname = fmt.format(label=label)
         
        return luigi.LocalTarget(pjoin(self.outdir, out_fname))

    def run(self):