How to use the cntk.io.StreamDefs function in cntk

To help you get started, we’ve selected a few cntk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Azure / BatchAI / recipes / CNTK / CNTK-GPU-Python-Distributed / ConvNet_CIFAR10_DataAug_Distributed.py View on Github external
transforms = []
    if train:
        transforms += [
            xforms.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter
        ]

    transforms += [
        xforms.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
        xforms.mean(mean_file)
    ]

    # Deserializer
    return C.io.MinibatchSource(
        C.io.ImageDeserializer(
            map_file,
            C.io.StreamDefs(features=C.io.StreamDef(field='image', transforms=transforms), # 1st col in mapfile referred to as 'image'
                            labels=C.io.StreamDef(field='label', shape=num_classes))),   # and second as 'label'
        randomize=train,
        max_samples=total_number_of_samples,
        multithreaded_deserializer=True)
github microsoft / CNTK / Examples / Image / Classification / MLP / Python / SimpleMNIST.py View on Github external
def create_reader(path, is_training, input_dim, label_dim):
    return MinibatchSource(CTFDeserializer(path, StreamDefs(
        features  = StreamDef(field='features', shape=input_dim, is_sparse=False),
        labels    = StreamDef(field='labels',   shape=label_dim, is_sparse=False)
    )), randomize=is_training, max_sweeps = INFINITELY_REPEAT if is_training else 1)
github Azure / ObjectDetectionUsingCntk / helpers_cntk.py View on Github external
label_file = join(data_path, data_set + '.roilabels.txt')
    if not os.path.exists(map_file) or not os.path.exists(roi_file) or not os.path.exists(label_file):
        raise RuntimeError("File '%s', '%s' or '%s' does not exist. " % (map_file, roi_file, label_file))

    # read images
    nrImages = len(readTable(map_file))
    transforms = [scale(width=img_width, height=img_height, channels=3,
                        scale_mode="pad", pad_value=114, interpolations='linear')]
    image_source = ImageDeserializer(map_file, StreamDefs(features = StreamDef(field='image', transforms=transforms)))

    # read rois and labels
    rois_dim  = 4 * n_rois
    label_dim = n_classes * n_rois
    roi_source = CTFDeserializer(roi_file, StreamDefs(
        rois = StreamDef(field='rois', shape=rois_dim, is_sparse=False)))
    label_source = CTFDeserializer(label_file, StreamDefs(
        roiLabels = StreamDef(field='roiLabels', shape=label_dim, is_sparse=False)))

    # define a composite reader
    mb = MinibatchSource([image_source, roi_source, label_source], max_samples=sys.maxsize, randomize=randomize)
    return (mb, nrImages)
github NervanaSystems / ngraph-neon / ngraph / frontends / cntk / examples / mnist_training.py View on Github external
def create_reader(path, is_training, input_dim, output_dim):
    featureStream = C.io.StreamDef(field='features', shape=input_dim, is_sparse=False)
    labelStream = C.io.StreamDef(field='labels', shape=output_dim, is_sparse=False)

    return C.io.MinibatchSource(
        C.io.CTFDeserializer(
            path,
            C.io.StreamDefs(labels=labelStream, features=featureStream)
        ),
        randomize=is_training,
        max_sweeps=C.io.INFINITELY_REPEAT if is_training else 1
    )
github microsoft / CNTK / Examples / Image / Classification / GoogLeNet / InceptionV3 / Python / InceptionV3_ImageNet.py View on Github external
transforms = []
    if is_training:
        transforms += [
            C.io.transforms.crop(crop_type='randomarea', area_ratio=(0.05, 1.0), aspect_ratio=(0.75, 1.0), jitter_type='uniratio'), # train uses jitter
            C.io.transforms.scale(width=IMAGE_WIDTH, height=IMAGE_HEIGHT, channels=NUM_CHANNELS, interpolations='linear'),
            C.io.transforms.color(brightness_radius=0.125, contrast_radius=0.5, saturation_radius=0.5)
        ]
    else:
        transforms += [
            C.io.transforms.crop(crop_type='center', side_ratio=0.875), # test has no jitter
            C.io.transforms.scale(width=IMAGE_WIDTH, height=IMAGE_HEIGHT, channels=NUM_CHANNELS, interpolations='linear')
        ]

    # deserializer
    return C.io.MinibatchSource(
        C.io.ImageDeserializer(map_file, C.io.StreamDefs(
            features=C.io.StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
            labels=C.io.StreamDef(field='label', shape=NUM_CLASSES))),   # and second as 'label'
        randomize=is_training,
        max_samples=total_number_of_samples,
        multithreaded_deserializer=True)
github microsoft / CNTK / Examples / SequenceToSequence / CMUDict / Python / Sequence2Sequence.py View on Github external
def create_reader(path, randomize, input_vocab_dim, label_vocab_dim, size=INFINITELY_REPEAT):
    return MinibatchSource(CTFDeserializer(path, StreamDefs(
        features  = StreamDef(field='S0', shape=input_vocab_dim,  is_sparse=True),
        labels    = StreamDef(field='S1', shape=label_vocab_dim,  is_sparse=True)
    )), randomize=randomize, epoch_size = size)
github microsoft / CNTK / bindings / python / examples / LanguageUnderstanding / LanguageUnderstanding.py View on Github external
def create_reader(path, is_training):
    return MinibatchSource(CTFDeserializer(path, StreamDefs(
        query         = StreamDef(field='S0', shape=vocab_size,  is_sparse=True),
        intent_unused = StreamDef(field='S1', shape=num_intents, is_sparse=True),  # BUGBUG: unused, and should infer dim
        slot_labels   = StreamDef(field='S2', shape=num_labels,  is_sparse=True)
    )), randomize=is_training, epoch_size = INFINITELY_REPEAT if is_training else FULL_DATA_SWEEP)
github NervanaSystems / ngraph-neon / ngraph / frontends / cntk / examples / cifar_training.py View on Github external
transforms = []
    if train:
        transforms += [
            xforms.crop(crop_type='randomside', side_ratio=0.8)
        ]
    transforms += [
        xforms.scale(
            width=image_width,
            height=image_height,
            channels=num_channels,
            interpolations='linear'
        ),
        xforms.mean(mean_file)
    ]

    return C.io.MinibatchSource(C.io.ImageDeserializer(map_file, C.io.StreamDefs(
        features=C.io.StreamDef(field='image', transforms=transforms),
        labels=C.io.StreamDef(field='label', shape=num_classes)
    )))
github microsoft / CNTK / Examples / Speech / AN4 / Python / HTK_LSTM_Truncated_Distributed.py View on Github external
def create_mb_source(features_file, labels_file, label_mapping_filem, total_number_of_samples):
    for file_name in [features_file, labels_file, label_mapping_file]:
        if not os.path.exists(file_name):
            raise RuntimeError("File '%s' does not exist. Please check that datadir argument is set correctly." % (file_name))

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
        awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))

    # Enabling BPTT with truncated_length > 0
    return MinibatchSource([fd,ld], truncation_length=250, max_samples=total_number_of_samples)
github microsoft / CNTK / Examples / Image / TransferLearning / TransferLearning.py View on Github external
def create_mb_source(map_file, image_width, image_height, num_channels, num_classes, randomize=True):
    transforms = [xforms.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear')] 
    return MinibatchSource(ImageDeserializer(map_file, StreamDefs(
            features =StreamDef(field='image', transforms=transforms),
            labels   =StreamDef(field='label', shape=num_classes))),
            randomize=randomize)