Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from esper.kube import cluster_config, worker_config, make_cluster
from esper.scanner_bench import ScannerJobConfig, bench
import attr
import scannerpy
import json
import pickle
# db = scannerpy.Database()
# print('making trace')
# db.profiler(665).write_trace('shots.tar.gz')
# print('done')
# exit()
with Timer('Loading videos'):
videos = list(Video.objects.filter(threeyears_dataset=False).order_by('id'))
#videos = videos[:1]
print('Found {} videos'.format(len(videos)))
def run_pipeline(db, videos, **kwargs):
return shot_detection.compute_histograms(
db,
videos=[v.for_scannertools() for v in videos],
**kwargs)
if False:
with Timer('Benchmarking histograms'):
# configs = [
# (attr.evolve(cluster_config, worker=attr.evolve(
# worker_config, type=kube.MachineTypeName(name='n1-standard-4'))),
# [ScannerJobConfig(
# exit()
with Timer('Loading videos'):
videos = list(Video.objects.filter(threeyears_dataset=False).order_by('id'))
#videos = videos[:1]
print('Found {} videos'.format(len(videos)))
def run_pipeline(db, videos, **kwargs):
return shot_detection.compute_histograms(
db,
videos=[v.for_scannertools() for v in videos],
**kwargs)
if False:
with Timer('Benchmarking histograms'):
# configs = [
# (attr.evolve(cluster_config, worker=attr.evolve(
# worker_config, type=kube.MachineTypeName(name='n1-standard-4'))),
# [ScannerJobConfig(
# io_packet_size=1000,
# work_packet_size=20,
# batch=20)]),
# (attr.evolve(cluster_config, worker=attr.evolve(
# worker_config, type=kube.MachineTypeName(name='n1-standard-8'))),
# [ScannerJobConfig(
# io_packet_size=1000,
# work_packet_size=20,
# batch=20)]),
# (attr.evolve(cluster_config, worker=attr.evolve(
# worker_config, type=kube.MachineTypeName(name='n1-standard-32'))),
# [ScannerJobConfig(
model_dir=self._model_dir,
device=DeviceType.GPU if self._db.has_gpu() else DeviceType.CPU)
}
embed_faces = FaceEmbeddingPipeline.make_runner()
def frames_for_video(video):
return [f['number'] for f in
Frame.objects.filter(video=video, shot_boundary=False).annotate(
c=Subquery(Face.objects.filter(frame=OuterRef('pk')).values('frame').annotate(c=Count('*')).values('c')))
.filter(c__gte=1)
.values('number').order_by('number')]
if False:
with Timer('benchmark'):
videos = videos[:30]
def run_pipeline(db, videos, frames, **kwargs):
return embed_faces(
db,
videos=[v.for_scannertools() for v in videos],
frames=frames,
faces=[ScannerSQLTable(Face, v) #num_elements=len(f))
for v, f in zip(videos, frames)],
cache=False,
**kwargs)
cfg = cluster_config(
num_workers=5, worker=worker_config('n1-standard-32'), pipelines=[face_embedding.FaceEmbeddingPipeline])
configs = [(cfg, [
ScannerJobConfig(io_packet_size=500, work_packet_size=20, pipelines_per_worker=4),
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4),
compute_shot_boundaries = ShotBoundaryPipeline.make_runner()
# with Timer('Histogram'):
# cfg = cluster_config(
# num_workers=300,
# worker=worker_config('n1-standard-16'))
# with make_cluster(cfg, no_delete=True) as db_wrapper:
# videos = videos
#videos = list(Video.objects.filter(id__gte=91250, id__lte=91350))
# videos = [Video.objects.get(id=63970)]
videos = videos
with Timer('Shot boundaries'):
cfg = cluster_config(
num_workers=60,
worker=worker_config('n1-highmem-16'),
workers_per_node=2,
num_load_workers=1,
num_save_workers=2)
with make_cluster(cfg, no_delete=True) as db_wrapper:
# from esper.scannerutil import ScannerWrapper
# if True:
# db_wrapper = ScannerWrapper.create()
db = db_wrapper.db
job_config = ScannerJobConfig(io_packet_size=10000, work_packet_size=400, batch=400)
hists = run_pipeline(db, videos, batch=job_config.batch, run_opts={
class FaceDetectionPipeline(ScannerSQLPipeline, face_detection.FaceDetectionPipeline):
db_class = Face
json_kernel = 'FacesToJson'
additional_sources = ['frame_ids']
def build_pipeline(self):
output_ops = super(FaceDetectionPipeline, self).build_pipeline()
output_ops['frame_ids'] = self._sources['frame_ids'].op
return output_ops
detect_faces = FaceDetectionPipeline.make_runner()
videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id'))
if False:
with Timer('benchmark'):
videos = videos[:50]
def run_pipeline(db, videos, frames, **kwargs):
return face_detection.detect_faces(db, videos=[v.for_scannertools() for v in videos], frames=frames, cache=False, **kwargs)
cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32'))
configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])]
bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]},
run_pipeline, configs, no_delete=True, force=True)
with Timer('run'):
print('Getting frames')
def load_frames():
return [[f['number'] for f in Frame.objects.filter(video=v, shot_boundary=False).values('number').order_by('number')]
for v in tqdm(videos)]
videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id'))
if False:
with Timer('benchmark'):
videos = videos[:50]
def run_pipeline(db, videos, frames, **kwargs):
return face_detection.detect_faces(db, videos=[v.for_scannertools() for v in videos], frames=frames, cache=False, **kwargs)
cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32'))
configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])]
bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]},
run_pipeline, configs, no_delete=True, force=True)
with Timer('run'):
print('Getting frames')
def load_frames():
return [[f['number'] for f in Frame.objects.filter(video=v, shot_boundary=False).values('number').order_by('number')]
for v in tqdm(videos)]
frames = pcache.get('face_frames', load_frames)
cfg = cluster_config(
num_workers=100,
worker=worker_config('n1-standard-64'),
num_load_workers=2,
num_save_workers=2)
with make_cluster(cfg, sql_pool=4, no_delete=True) as db_wrapper:
# if True:
# db_wrapper = ScannerWrapper.create(enable_watchdog=False)
'faces': self._sources['faces'].op
}
detect_genders = GenderDetectionPipeline.make_runner()
videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id'))
def frames_for_video(video):
return [f['number'] for f in
Frame.objects.filter(video=video, shot_boundary=False).annotate(
c=Subquery(Face.objects.filter(frame=OuterRef('pk')).values('frame').annotate(c=Count('*')).values('c')))
.filter(c__gte=1)
.values('number').order_by('number')]
if False:
with Timer('benchmark'):
videos = videos[:50]
def run_pipeline(db, videos, frames, **kwargs):
return detect_genders(
db,
db_videos=videos,
videos=[v.for_scannertools() for v in videos],
frames=frames,
faces=[ScannerSQLTable(Face, v) #num_elements=len(f))
for v, f in zip(videos, frames)],
cache=False,
**kwargs)
cfg = cluster_config(
num_workers=5, worker=worker_config('n1-standard-32'), pipelines=[GenderDetectionPipeline])
configs = [(cfg, [
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4),