Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@staticmethod
def get_config():
configs = super(DownscalingWorkflow, DownscalingWorkflow).get_config()
configs.update({'downscaling': downscale_tasks.DownscalingLocal.default_task_config(),
'copy_volume': copy_tasks.CopyVolumeLocal.default_task_config()})
return configs
# HDF5 is frickin slow, so it seems to be better to do the
# computations in n5 and then copy data to h5
class PainteraToBdvWorkflow(WorkflowBase):
input_path = luigi.Parameter()
input_key_prefix = luigi.Parameter()
output_path = luigi.Parameter()
dtype = luigi.Parameter(default=None)
metadata_dict = luigi.DictParameter(default={})
skip_existing_levels = luigi.BoolParameter(default=True)
# we offset the scale by 1 because
# 0 indicates the original resoulution
def get_scale_key(self, scale, metadata_format):
if metadata_format == 'paintera':
prefix = 's%i' % scale
out_key = os.path.join(self.input_key_prefix, prefix)
elif metadata_format == 'bdv':
# we only support a single time-point and single set-up for now
# TODO support multiple set-ups for multi-channel data
out_key = 't00000/s00/%i/cells' % scale
return out_key
def get_scales(self):
with file_reader(self.input_path, 'r') as f:
},
indent=4,
default=str,
)
)
logger.info(f"[{self.portfolio}] {self.account_id}:{self.region} :: Finished importing")
class CreateLaunchRoleConstraintsForPortfolio(PuppetTask):
account_id = luigi.Parameter()
region = luigi.Parameter()
portfolio = luigi.Parameter()
hub_portfolio_id = luigi.Parameter()
puppet_account_id = luigi.Parameter()
launch_constraints = luigi.DictParameter()
dependencies = luigi.ListParameter(default=[])
post_actions = luigi.ListParameter()
should_use_sns = luigi.Parameter(default=False, significant=False)
def requires(self):
return {
'create_spoke_local_portfolio_task': ImportIntoSpokeLocalPortfolioTask(
account_id=self.account_id,
region=self.region,
portfolio=self.portfolio,
hub_portfolio_id=self.hub_portfolio_id,
),
'deps': [ProvisionProductTask(**dependency) for dependency in self.dependencies]
i = 0
for tweet in t.search(term):
i += 1
if i > count:
break
if i % 500 == 0:
self.update_job(
date_path=self.search['date_path'],
status="STARTED: %s - %s/%s" %
(self.task_family, i, count)
)
fh.write(json.dumps(tweet) + '\n')
class CountHashtags(EventfulTask):
search = luigi.DictParameter()
def requires(self):
return FetchTweets(search=self.search)
def output(self):
fname = self.input().fn.replace('tweets.json', 'count-hashtags.csv')
return luigi.LocalTarget(fname)
def run(self):
c = Counter()
for tweet_str in self.input().open('r'):
tweet = json.loads(tweet_str)
c.update([ht['text'].lower()
for ht in tweet['entities']['hashtags']])
with self.output().open('w') as fp_counts:
writer = csv.DictWriter(fp_counts, delimiter=',',
if not location:
raise Exception("Couldn't find location for table: {0}".format(str(self)))
return location
def open(self, mode):
return NotImplementedError("open() is not supported for HivePartitionTarget")
class ExternalHiveTask(luigi.ExternalTask):
"""
External task that depends on a Hive table/partition.
"""
database = luigi.Parameter(default='default')
table = luigi.Parameter()
partition = luigi.DictParameter(
default={},
description='Python dictionary specifying the target partition e.g. {"date": "2013-01-25"}'
)
def output(self):
if self.partition:
return HivePartitionTarget(
database=self.database,
table=self.table,
partition=self.partition,
)
else:
return HiveTableTarget(
database=self.database,
table=self.table,
)
booster='gbtree',
gamma=0,
min_child_weight=1,
max_delta_step=0,
subsample=1,
colsample_bytree=1,
colsample_bylevel=1,
reg_alpha=0,
reg_lambda=1,
scale_pos_weight=1,
base_score=0.5)) # type: Dict[str, Any]
dictionary_filter_kwargs = luigi.DictParameter(
default=dict(no_below=5, no_above=0.5, keep_n=100000, keep_tokens=None)) # type: Dict[str, Any]
fasttext_kwargs = luigi.DictParameter(
default=dict(
corpus_file=None,
sg=0,
hs=0,
size=200,
alpha=0.025,
window=5,
min_count=5,
max_vocab_size=None,
word_ngrams=1,
sample=1e-3,
seed=1,
workers=3,
min_alpha=0.0001,
negative=5,
ns_exponent=0.75,
import gokart
import luigi
import sklearn
import redshells
import redshells.train.utils
class _BinaryClassificationModelTask(gokart.TaskOnKart):
train_data_task = gokart.TaskInstanceParameter(
description='A task outputs a pd.DataFrame with columns={`target_column_name`}.')
target_column_name = luigi.Parameter(default='category', description='Category column names.') # type: str
model_name = luigi.Parameter(
description='A model name which has "fit" interface, and must be registered by "register_prediction_model".'
) # type: str
model_kwargs = luigi.DictParameter(
default=dict(), description='Arguments of the model which are created with model_name.') # type: Dict[str, Any]
def requires(self):
return self.train_data_task
def output(self):
return self.make_target(self.output_file_path)
def create_model(self):
return redshells.factory.create_prediction_model(self.model_name, **self.model_kwargs)
def create_train_data(self):
data = self.load_data_frame(required_columns={self.target_column_name})
data = sklearn.utils.shuffle(data)
y = data[self.target_column_name].values
entity_type (str): Name of the entity type to label this task with.
kwargs (dict): Any other job parameters to pass to the batchable.
'''
date = luigi.DateParameter()
routine_id = luigi.Parameter()
intermediate_bucket = luigi.Parameter()
db_config_env = luigi.Parameter()
db_section = luigi.Parameter(default="mysqldb")
process_batch_size = luigi.IntParameter(default=10000)
drop_and_recreate = luigi.BoolParameter(default=False)
dataset = luigi.Parameter()
endpoint = luigi.Parameter()
id_field = luigi.Parameter()
filter = luigi.Parameter(default=None)
entity_type = luigi.Parameter()
kwargs = luigi.DictParameter(default={})
def output(self):
'''Points to the output database engine'''
self.db_config_path = os.environ[self.db_config_env]
db_config = get_config(self.db_config_path, "mysqldb")
db_config["database"] = 'dev' if self.test else 'production'
db_config["table"] = f"{self.routine_id} " # Not a real table
update_id = f"{self.routine_id}_{self.date}"
return MySqlTarget(update_id=update_id, **db_config)
def prepare(self):
if self.test:
self.process_batch_size = 1000
logging.warning("Batch size restricted to "
f"{self.process_batch_size}"
" while in test mode")
else:
raise Exception(f"Unknown type: {self.type}")
with self.output().open('w') as output_file:
output_file.write(rendered)
class CreateVersionPipelineTask(FactoryTask):
all_regions = luigi.ListParameter()
version = luigi.DictParameter()
product = luigi.DictParameter()
provisioner = luigi.DictParameter()
products_args_by_region = luigi.DictParameter()
factory_version = luigi.Parameter()
region = luigi.Parameter()
tags = luigi.ListParameter()
def output(self):
return luigi.LocalTarget(
f"output/CreateVersionPipelineTask/"
f"{self.product.get('Name')}_{self.version.get('Name')}.template.yaml"
)
def requires(self):
return CreateVersionPipelineTemplateTask(
all_regions=self.all_regions,
version=self.version,
class WordItemSimilarityConfig(luigi.Config):
task_namespace = 'redshells.word_item_similarity'
matrix_factorization_kwargs = luigi.DictParameter(
default=dict(
n_latent_factors=20,
learning_rate=1e-3,
reg_item=1e-5,
reg_user=1e-5,
batch_size=2**10,
epoch_size=30,
test_size=0.1,
)) # type: Dict[str, Any]
xgb_classifier_kwargs = luigi.DictParameter(
default=dict(
max_depth=5,
learning_rate=0.1,
n_estimators=300,
silent=True,
objective="binary:logistic",
booster='gbtree',
gamma=0,
min_child_weight=1,
max_delta_step=0,
subsample=1,
colsample_bytree=1,
colsample_bylevel=1,
reg_alpha=0,
reg_lambda=1,
scale_pos_weight=1,
with self.output().temporary_path() as out_fname:
_calculate_angles(acqs[0], self.input().path, out_fname,
self.compression, self.filter_opts, self.tle_path)
class AncillaryData(luigi.Task):
"""Get all ancillary data."""
level1 = luigi.Parameter()
work_root = luigi.Parameter(significant=False)
granule = luigi.OptionalParameter(default='')
vertices = luigi.TupleParameter()
workflow = luigi.EnumParameter(enum=Workflow)
acq_parser_hint = luigi.OptionalParameter(default='')
aerosol = luigi.DictParameter({'user': 0.05}, significant=False)
brdf_path = luigi.Parameter(significant=False)
brdf_premodis_path = luigi.Parameter(significant=False)
ozone_path = luigi.Parameter(significant=False)
water_vapour = luigi.DictParameter({'user': 1.5}, significant=False)
dsm_fname = luigi.Parameter(significant=False)
ecmwf_path = luigi.Parameter(significant=False)
invariant_height_fname = luigi.Parameter(significant=False)
compression = luigi.EnumParameter(enum=H5CompressionFilter,
default=H5CompressionFilter.LZF,
significant=False)
filter_opts = luigi.DictParameter(default=None, significant=False)
def requires(self):
group = acquisitions(self.level1, self.acq_parser_hint).supported_groups[0]
args = [self.level1, self.work_root, self.granule, group]
return CalculateSatelliteAndSolarGrids(*args)