How to use ijson - 10 common examples

To help you get started, we’ve selected a few ijson examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github isagalaev / ijson / tests.py View on Github external
def test_invalid(self):
        for json in INVALID_JSONS:
            # Yajl1 doesn't complain about additional data after the end
            # of a parsed object. Skipping this test.
            if self.__class__.__name__ == 'YajlParse' and json == YAJL1_PASSING_INVALID:
                continue
            with self.assertRaises(common.JSONError) as cm:
                list(self.backend.basic_parse(BytesIO(json)))
github isagalaev / ijson / tests.py View on Github external
def test_parse(self):
        events = common.parse(basic_parse(BytesIO(JSON)))
        events = [value
            for prefix, event, value in events
            if prefix == 'docs.item.meta.item.item'
        ]
        self.assertEqual(events, [1])
github isagalaev / ijson / tests.py View on Github external
def test_incomplete(self):
        for json in INCOMPLETE_JSONS:
            with self.assertRaises(common.IncompleteJSONError):
                list(self.backend.basic_parse(BytesIO(json)))
github isagalaev / ijson / tests.py View on Github external
def test_items(self):
        events = basic_parse(BytesIO(JSON))
        meta = list(common.items(common.parse(events), 'docs.item.meta'))
        self.assertEqual(meta, [
            [[1], {}],
            {'key': 'value'},
            None,
        ])
github isagalaev / ijson / tests.py View on Github external
def test_scalar_builder(self):
        builder = common.ObjectBuilder()
        for event, value in basic_parse(BytesIO(SCALAR_JSON)):
            builder.event(event, value)
        self.assertEqual(builder.value, 0)
github isagalaev / ijson / tests.py View on Github external
def test_object_builder(self):
        builder = common.ObjectBuilder()
        for event, value in basic_parse(BytesIO(JSON)):
            builder.event(event, value)
        self.assertEqual(builder.value, {
            'docs': [
                {
                   'string': 'строка - тест',
                   'null': None,
                   'boolean': False,
                   'true': True,
                   'integer': 0,
                   'double': Decimal('0.5'),
                   'exponent': 100,
                   'long': 10000000000,
                },
                {
                    'meta': [[1], {}],
github lyeoni / prenlp / prenlp / data / dataset / language_modeling.py View on Github external
def _get_data(self) -> list:
        out_path_train = self.root/self.out_filename

        if out_path_train.exists():
            train = load_language_modeling(out_path_train)
            dataset = train
        else:
            dataset = []
            with open(self.root/self.dirname, 'r', encoding='utf-8') as jfile:
                for item in tqdm(ijson.items(jfile, 'item')):
                    text = self._normalize(item['text']).strip()
                    samples = list(filter(lambda x: len(x) > 0, text.split('\n'))) # split document into sentences(len > 0)
                    dataset += samples
                    # If sample is a document, use below code not above two lines.
                    # sample = '\n'.join(list(filter(lambda x: len(x) > 0, text.split('\n'))))
                    # dataset.append(sample)
                    
            # Save dataset
            (self.root/self.dirname).unlink()
            save_language_modeling(dataset, to_path=out_path_train)
            
        return dataset
github EastonLee / Taobao_Crawler / taobao_crawler / spiders / taobao_2.py View on Github external
"""
        :return dicted js obj  g_page_config:
        :rtype :dict
        """
        if self.detect_anti_spider_from_response(response):
            logger.critical(anti_spider_breakpoit_msg)
            raise CloseSpider(anti_spider_breakpoit_msg)
            #sys.exit() #won't quit, just a exception
        # this name is from taobao page: https://s.taobao.com/search?q=空调
        g_page_config = ''
        for line in response.body.split('\n'):
            if 'g_page_config' in line:
                g_page_config = line.split('{', 1)[1].rsplit('}', 1)[0]
                break

        js_obj_gen = ijson.items(StringIO(''.join(('{', g_page_config, '}'))), '')
        js_obj = list(js_obj_gen)[0]

        if self.detect_anti_spider_from_js_obj(js_obj, response):
            logger.critical(anti_spider_breakpoit_msg)
            raise CloseSpider(anti_spider_breakpoit_msg)
        return js_obj
github data61 / anonlink-entity-service / backend / entityservice.py View on Github external
def counting_generator():
        try:
            for clk in ijson.items(raw_stream, 'clks.item'):
                # Often the clients upload base64 strings with newlines
                # We remove those here
                raw = ''.join(clk.split('\n')).encode() + b'\n'
                store['count'] += 1
                store['totalbytes'] += len(raw)
                yield raw
        except ijson.common.IncompleteJSONError as e:
            store['count'] = 0
            app.logger.warning("Stopping as we have received incomplete json")
            return
github data61 / anonlink-entity-service / backend / entityservice.py View on Github external
def counting_generator():
        try:
            for clk in ijson.items(raw_stream, 'clks.item'):
                # Often the clients upload base64 strings with newlines
                # We remove those here
                raw = ''.join(clk.split('\n')).encode() + b'\n'
                store['count'] += 1
                store['totalbytes'] += len(raw)
                yield raw
        except ijson.common.IncompleteJSONError as e:
            store['count'] = 0
            app.logger.warning("Stopping as we have received incomplete json")
            return