How to use the dffml.repo.Repo function in dffml

To help you get started, we’ve selected a few dffml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github intel / dffml / tests / test_cli.py View on Github external
    async def predict(self, repos: AsyncIterator[Repo]) -> AsyncIterator[Repo]:
        async for repo in repos:
            repo.predicted(random.random(), float(repo.src_url))
            yield repo
github intel / dffml / tests / test_repo.py View on Github external
def setUp(self):
        self.null = Repo("null")
        self.full = Repo(
            "full",
            data=dict(
                features=dict(dead="beef"),
                extra=dict(extra="read all about it"),
            ),
            extra=dict(half=True),
        )
github intel / dffml / dffml / util / testing / source.py View on Github external
async def test_update(self):
        full_src_url = "0"
        empty_src_url = "1"
        full_repo = Repo(
            full_src_url,
            data={
                "features": {
                    "PetalLength": 3.9,
                    "PetalWidth": 1.2,
                    "SepalLength": 5.8,
                    "SepalWidth": 2.7,
                },
                "prediction": {"value": "feedface", "confidence": 0.42},
            },
        )
        empty_repo = Repo(
            empty_src_url,
            data={
                "features": {
                    "PetalLength": 3.9,
github intel / dffml / examples / maintained / demoapp / source.py View on Github external
async def repo(self, src_url: str):
        repo = Repo(src_url)
        db = self.conn
        # Get features
        await db.execute(
            "SELECT json FROM ml_data WHERE src_url=%s", (src_url,)
        )
        dump = await db.fetchone()
        if dump is not None and dump[0] is not None:
            repo.merge(Repo(src_url, data=json.loads(dump[0])))
        await db.execute(
            "SELECT maintained FROM `status` WHERE src_url=%s", (src_url,)
        )
        maintained = await db.fetchone()
        if maintained is not None and maintained[0] is not None:
            repo.evaluated({"maintained": str(maintained[0])})
        return repo
github intel / dffml / model / scikit / dffml_model_scikit / scikit_base.py View on Github external
async def predict(
        self, repos: AsyncIterator[Repo]
    ) -> AsyncIterator[Tuple[Repo, Any, float]]:
        if not os.path.isfile(self._filename()):
            raise ModelNotTrained("Train model before prediction.")
        async for repo in repos:
            feature_data = repo.features(self.features)
            df = pd.DataFrame(feature_data, index=[0])
            predict = np.array(df)
            self.logger.debug(
                "Predicted Value of {} for {}: {}".format(
                    self.parent.config.predict,
                    predict,
                    self.clf.predict(predict),
                )
            )
            repo.predicted(self.clf.predict(predict)[0], self.confidence)
            yield repo
github intel / dffml / examples / source / custom_sqlite.py View on Github external
    async def repos(self) -> AsyncIterator[Repo]:
        # NOTE This logic probably isn't what you want. Only for demo purposes.
        src_urls = await self.parent.db.execute("SELECT src_url FROM features")
        for row in await src_urls.fetchall():
            yield await self.repo(row["src_url"])
github intel / dffml / service / http / dffml_service_http / routes.py View on Github external
async def model_predict(self, request, mctx):
        # TODO Provide an iterkey method for model prediction
        chunk_size = int(request.match_info["chunk_size"])
        if chunk_size != 0:
            return web.json_response(
                {"error": "Multiple request iteration not yet supported"},
                status=HTTPStatus.BAD_REQUEST,
            )
        # Get the repos
        repos: Dict[str, Repo] = {
            src_url: Repo(src_url, data=repo_data)
            for src_url, repo_data in (await request.json()).items()
        }
        # Create an async generator to feed repos
        async def repo_gen():
            for repo in repos.values():
                yield repo

        # Feed them through prediction
        return web.json_response(
            {
                "iterkey": None,
                "repos": {
                    repo.src_url: repo.export()
                    async for repo in mctx.predict(repo_gen())
                },
github intel / dffml / examples / source / custom_sqlite.py View on Github external
async def repo(self, src_url: str):
        db = self.parent.db
        repo = Repo(src_url)
        # Get features
        features = await db.execute(
            "SELECT " + ", ".join(self.parent.FEATURE_COLS) + " "
            "FROM features WHERE src_url=?",
            (repo.src_url,),
        )
        features = await features.fetchone()
        if features is not None:
            repo.evaluated(features)
        # Get prediction
        prediction = await db.execute(
            "SELECT * FROM prediction WHERE " "src_url=?", (repo.src_url,)
        )
        prediction = await prediction.fetchone()
        if prediction is not None:
            repo.predicted(prediction["value"], prediction["confidence"])
github intel / dffml / dffml / skel / model / REPLACE_IMPORT_PACKAGE_NAME / misc.py View on Github external
async def predict(
        self, repos: AsyncIterator[Repo]
    ) -> AsyncIterator[Tuple[Repo, Any, float]]:
        """
        Uses trained data to make a prediction about the quality of a repo.
        """
        async for repo in repos:
            yield repo, self.parent.config.classifications[
                repo.feature(self.parent.config.features.names()[0])
            ], 1.0
github intel / dffml / model / scratch / dffml_model_scratch / slr.py View on Github external
async def predict(
        self, repos: AsyncIterator[Repo]
    ) -> AsyncIterator[Tuple[Repo, Any, float]]:
        if self.regression_line is None:
            raise ModelNotTrained("Train model before prediction.")
        async for repo in repos:
            feature_data = repo.features(self.features)
            repo.predicted(
                await self.predict_input(feature_data[self.features[0]]),
                self.regression_line[2],
            )
            yield repo