How to use the depccg.tools.ja.keyaki_reader.TrainingDataCreator function in depccg

To help you get started, we’ve selected a few depccg examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github masashi-y / depccg / depccg / tools / ja / keyaki_reader.py View on Github external
parser.add_argument('--afix-freq-cut',
                        type=int,
                        default=5,
                        help='only allow afixes which appear >= freq-cut')
    parser.add_argument('--char-freq-cut',
                        type=int,
                        default=5,
                        help='only allow characters which appear >= freq-cut')
    parser.add_argument('--mode',
                        choices=['train', 'test'],
                        default='train')

    args = parser.parse_args()

    if args.mode == 'train':
        TrainingDataCreator.create_traindata(args)
    else:
        TrainingDataCreator.create_testdata(args)
github masashi-y / depccg / depccg / tools / ja / keyaki_reader.py View on Github external
def convert_keyaki_to_json(keyakipath):
    return TrainingDataCreator.convert_json(keyakipath)
github masashi-y / depccg / depccg / tools / ja / keyaki_reader.py View on Github external
def create_testdata(args):
        self = TrainingDataCreator(args.PATH,
                                   args.word_freq_cut,
                                   args.char_freq_cut,
                                   args.cat_freq_cut)

        trees = [tree for tree in read_keyaki(self.filepath)]
        # trees = [] # TODO
        # for line in open(self.filepath):
        #     try:
        #         trees.append(KeyakiParser(line.strip()).parse())
        #     except Exception:
        #         continue

        self._create_samples(trees)
        with open(args.OUT / 'testdata.json', 'w') as f:
            logger.info(f'writing to {f.name}')
            json.dump(self.samples, f)
github masashi-y / depccg / depccg / tools / ja / keyaki_reader.py View on Github external
default=5,
                        help='only allow afixes which appear >= freq-cut')
    parser.add_argument('--char-freq-cut',
                        type=int,
                        default=5,
                        help='only allow characters which appear >= freq-cut')
    parser.add_argument('--mode',
                        choices=['train', 'test'],
                        default='train')

    args = parser.parse_args()

    if args.mode == 'train':
        TrainingDataCreator.create_traindata(args)
    else:
        TrainingDataCreator.create_testdata(args)
github masashi-y / depccg / depccg / tools / ja / keyaki_reader.py View on Github external
def create_traindata(args):
        self = TrainingDataCreator(args.PATH,
                                   args.word_freq_cut,
                                   args.char_freq_cut,
                                   args.cat_freq_cut)

        trees = [tree for tree in read_keyaki(self.filepath)]
        # trees = [] # TODO
        # for line in open(self.filepath):
        #     try:
        #         trees.append(KeyakiParser(line.strip()).parse())
        #     except Exception:
        #         continue

        for tree in trees:
            self._traverse(tree)
        self._create_samples(trees)
github masashi-y / depccg / depccg / tools / ja / keyaki_reader.py View on Github external
def convert_json(autopath):
        self = TrainingDataCreator(autopath, None, None, None)
        trees = [tree for tree in read_keyaki(self.filepath)]
        logger.info(f'loaded {len(trees)} trees')
        self._create_samples(trees)
        return self.samples