How to use the deeppavlov.core.common.file.read_json function in deeppavlov

To help you get started, we’ve selected a few deeppavlov examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deepmipt / DeepPavlov / deeppavlov / skills / odqa / eval_scripts / evaluate_ranker1_squad.py View on Github external
def main():
    args = parser.parse_args()
    n = args.number_retrieve
    config = read_json(args.config_path)
    ranker = build_model_from_config(config)  # chainer
    dataset = read_json(args.dataset_path)
    iterator = SQLiteDataIterator(data_url=args.database_url)

    dataset_size = len(dataset)
    logger.info('Dataset size: {}'.format(dataset_size))
    correct_answers = 0
    n_queries = 0
    start_time = time.time()

    try:
        for instance in dataset:
            q = instance['question']
            q = unicodedata.normalize('NFD', q)
            result = ranker([q])
            top_n = result[0][0]
github deepmipt / DeepPavlov / deeppavlov / skills / odqa / eval_scripts / evaluate_ranker.py View on Github external
def main():
    args = parser.parse_args()
    config = read_json(args.config_path)
    ranker = build_model_from_config(config)  # chainer
    dataset = read_csv(args.dataset_path)
    # dataset = dataset[:10]

    qa_dataset_size = len(dataset)
    logger.info('QA dataset size: {}'.format(qa_dataset_size))
    # n_queries = 0  # DEBUG
    start_time = time.time()
    TEXT_IDX = 1

    try:
        mapping = {}
        pmef_mapping = {}

        ranker_answers = ranker([i['question'] for i in dataset])
        returned_db_size = len(ranker_answers[0])
github deepmipt / DeepPavlov / deeppavlov / skills / odqa / eval_scripts / evaluate_paragraph_ranker_recall.py View on Github external
def main():
    args = parser.parse_args()
    config = read_json(args.config_path)
    ranker = build_model_from_config(config)  # chainer
    dataset = read_json(args.dataset_path)

    dataset_size = len(dataset)
    logger.info('Dataset size: {}'.format(dataset_size))
    n_correct_answers = 0
    n_queries = 1
    start_time = time.time()

    try:
        for instance in dataset:
            q = instance['question']
            q = unicodedata.normalize('NFD', q)
            paragraphs = ranker([q])

            answers = instance['answers']
github deepmipt / DeepPavlov / deeppavlov / skills / odqa / help_scripts / negative_sampling / cut_truth_from_db.py View on Github external
def main():
    args = parser.parse_args()
    db_path = args.input_db_path
    dataset = read_json(args.dataset_path)
    # dataset = dataset[:10]
    print(f"Dataset size: {len(dataset)}")
    squad_articles = read_json(args.squad_articles)

    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    iterator = SQLiteDataIterator(db_path)
    failed = 0

    # wiki_titles = set()
    # relevant_titles = wiki_titles.union(set(read_json(args.relevant_titles_path)))

    try:
        for i, instance in enumerate(dataset):
            print(f"Processing instance {i}")
            # question = instance['question']
            context = instance['context']
            title = instance['title']
github deepmipt / DeepPavlov / deeppavlov / utils / connector / dialog_logger.py View on Github external
def __init__(self, enabled: bool = False, logger_name: Optional[str] = None) -> None:
        self.config: dict = read_json(get_settings_path() / LOGGER_CONFIG_FILENAME)
        self.enabled: bool = enabled or self.config['enabled']

        if self.enabled:
            self.logger_name: str = logger_name or self.config['logger_name']
            self.log_max_size: int = self.config['logfile_max_size_kb']
            self.log_file = self._get_log_file()
            self.log_file.writelines('"Dialog logger initiated"\n')
github deepmipt / DeepPavlov / deeppavlov / skills / odqa / help_scripts / negative_sampling / make_relevant_db.py View on Github external
def main():
    args = parser.parse_args()
    db_path = args.input_db_path
    saved_ids = read_json(args.ids_path)
    rel_ids = set(saved_ids).union(set(NEW2OLD_WIKI_TITLES_TRAIN.keys()))
    rel_ids = rel_ids.union(NEW2OLD_WIKI_TITLES_TRAIN.values())
    rel_ids = rel_ids.union(set([encode_utf8(k) for k in NEW2OLD_WIKI_TITLES_TRAIN.keys()]))

    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    iterator = SQLiteDataIterator(args.iterator_db)

    count = 0
    for i in iterator.doc_ids:
        if i not in rel_ids:
            cursor.execute("DELETE FROM documents WHERE id=?", (i,))
        count += 1
        print(f"Processing id {count}")
github deepmipt / DeepPavlov / deeppavlov / models / classifiers / keras_classification_model.py View on Github external
Returns:
            model with loaded weights and network parameters from files
            but compiled with given learning parameters
        """
        if self.load_path:
            if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir():
                raise ConfigError("Provided load path is incorrect!")

            opt_path = Path("{}_opt.json".format(str(self.load_path.resolve())))
            weights_path = Path("{}.h5".format(str(self.load_path.resolve())))

            if opt_path.exists() and weights_path.exists():

                log.info("[initializing `{}` from saved]".format(self.__class__.__name__))

                self.opt["final_learning_rate"] = read_json(opt_path).get("final_learning_rate")

                model_func = getattr(self, model_name, None)
                if callable(model_func):
                    model = model_func(**self.opt)
                else:
                    raise AttributeError("Model {} is not defined".format(model_name))

                log.info("[loading weights from {}]".format(weights_path.name))
                try:
                    model.load_weights(str(weights_path))
                except ValueError:
                    raise ConfigError("Some non-changable parameters of neural network differ from given pre-trained model")

                self.model = model

                return None
github deepmipt / DeepPavlov / deeppavlov / skills / odqa / eval_scripts / evaluate_ranker1_drones_chunks.py View on Github external
def main():
    args = parser.parse_args()
    config = read_json(args.config_path)
    ranker = build_model_from_config(config)  # chainer
    dataset = read_csv(args.dataset_path)
    iterator = SQLiteDataIterator(load_path=args.database_url)

    dataset_size = len(dataset)
    logger.info('Dataset size: {}'.format(dataset_size))
    correct_answers = 0
    n_queries = 0
    start_time = time.time()

    try:
        mapping = {}
        db_size = len(iterator.doc_ids)
        logger.info("DB size: {}".format(db_size))
        for n in range(1, db_size + 1):
            ranker.pipe[0][2].top_n = n
github deepmipt / DeepPavlov / deeppavlov / skills / odqa / help_scripts / get_ranker1_toloka_output.py View on Github external
def main():
    args = parser.parse_args()
    config = read_json(args.config_path)
    ranker = build_model_from_config(config)  # chainer
    dataset = read_csv(args.dataset_path)
    iterator = SQLiteDataIterator(data_url=args.database_url)

    dataset_size = len(dataset)
    logger.info('Dataset size: {}'.format(dataset_size))
    correct_answers = 0
    n_queries = 0
    start_time = time.time()

    try:
        mapping = []
        db_size = len(iterator.doc_ids)
        logger.info("DB size: {}".format(db_size))
        ranker.pipe[0][2].top_n = db_size
        for instance in dataset:
github deepmipt / DeepPavlov / deeppavlov / core / commands / utils.py View on Github external
def parse_config(config: Union[str, Path, dict]) -> dict:
    """Read config's variables and apply their values to all its properties"""
    if isinstance(config, (str, Path)):
        config = read_json(find_config(config))

    variables = {
        'DEEPPAVLOV_PATH': os.getenv(f'DP_DEEPPAVLOV_PATH', Path(__file__).parent.parent.parent)
    }
    for name, value in config.get('metadata', {}).get('variables', {}).items():
        env_name = f'DP_{name}'
        if env_name in os.environ:
            value = os.getenv(env_name)
        variables[name] = value.format(**variables)

    return _parse_config_property(config, variables)