How to use the ijson.backends.yajl2_cffi.parse function in ijson

To help you get started, we’ve selected a few ijson examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github SenticNet / CASCADE / users / create_per_user_paragraph.py View on Github external
def main():
    users_comments_dict = collections.defaultdict(list)

    with tqdm(desc="Grouping comments by user", total=12704751) as progress_bar:
        inside_comment = False
        comment_text = None
        comment_username = None

        with open(COMMENTS_DATASET_FILE_PATH, 'rb') as file_:
            # As the JSON file is large (2.5GB) and everything is in one line, is better to read it as a stream,
            # using a SAX-like approach.
            for prefix, type_, value in ijson.parse(file_):
                if inside_comment:
                    if prefix.endswith('.text'):
                        comment_text = value
                    elif prefix.endswith('.author'):
                        comment_username = value
                    elif type_ == 'end_map':  # This assumes there are no nested maps inside the comment maps.
                        if comment_text and comment_username and comment_text != 'nan' \
                                and comment_username != '[deleted]':
                            users_comments_dict[comment_username].append(comment_text)

                        inside_comment = False
                        comment_text = None
                        comment_username = None

                        progress_bar.update()
                elif type_ == 'start_map' and prefix:
github wodny / ncdu-export / flatten.py View on Github external
state = ParserState.START
dirs = []
key = None
obj = {}

argp = argparse.ArgumentParser()
argp.add_argument("file", type=argparse.FileType("rb"), help="ncdu export filename")
argp.add_argument("--dirs", choices=["array", "string"], default="string", help="directory name format output to flat file")
argp.add_argument("--verbose", action="store_true", help="enable verbose mode (inc. ijson variant)")
options = argp.parse_args()

if options.verbose:
    sys.stderr.write("ijson module variant: {}\n".format(ijson.__name__))

parser = ijson.parse(options.file)
for prefix, event, value in parser:
    if event == "start_array":
        if state != ParserState.START:
            # started non-header array (directory listing)
            state = ParserState.ARRAY_START
        else:
            # started header, omit this map
            state = ParserState.HEADER
    elif event == "end_array":
        # array means a (sub)directory so it was at least a second entry
        # (first entry is the directory's meta-data)
        state = ParserState.SUBSEQ_MAP
        if dirs:
            dirs.pop()
    elif state == ParserState.ARRAY_START and event == "start_map":
        # directory's meta-data
github johncsnyder / SwiftKitten / SwiftKitten.py View on Github external
# prevent duplicate requests
        if request in self.current_requests:
            raise AutocompleteRequestError(
                "Request denied: completion for \"{request}\" "
                "already in progress.".format(request=request)
            )

        # start request
        self.current_requests.add(request)

        # get completion command
        cmd = self.get_completion_cmd(view, text, offset)

        # run completion command
        p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
        parser = ijson.parse(p.stdout)
        completions = list(self._parse_completions(parser, included=included))

        # finish request
        self.current_requests.discard(request)

        return completions
github slub / efre-lod-elasticsearch-tools / luigi / update_gnd.py View on Github external
def yield_obj(path, basepath):
    with gzip.open(path, "r") as fin:
        builder = ijson.common.ObjectBuilder()
        for prefix, event, val in ijson.parse(fin):
            try:
                builder.event(event, val)
            except:
                if hasattr(builder, "value"):
                    print(builder.value)
            if prefix == basepath and event == "end_map":
                if hasattr(builder, "value"):
                    yield builder.value
                builder = ijson.common.ObjectBuilder()
github vidjil / vidjil / tools / vidjilparser.py View on Github external
def initModel(self, model_path):
        with open(model_path, 'rb') as model:
            parser = ijson.parse(model)
            for prefix, event, value in parser:
                if (prefix, event) not in self._model_prefixes:
                    self._model_prefixes.append((prefix, event))