How to use the depccg.tools.data.TrainingDataCreator function in depccg

To help you get started, we’ve selected a few depccg examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github masashi-y / depccg / depccg / tools / data.py View on Github external
type=int,
            default=5,
            help='only allow words which appear >= freq-cut')
    parser.add_argument('--afix-freq-cut',
            type=int,
            default=5,
            help='only allow afixes which appear >= freq-cut')
    parser.add_argument('--mode',
            choices=['train', 'test'],
            default='train')

    args = parser.parse_args()
    if args.mode == 'train':
        TrainingDataCreator.create_traindata(args)
    else:
        TrainingDataCreator.create_testdata(args)
github masashi-y / depccg / depccg / tools / data.py View on Github external
def create_traindata(args):
        self = TrainingDataCreator(args.PATH,
                                   args.word_freq_cut,
                                   args.cat_freq_cut,
                                   args.afix_freq_cut)

        trees = [tree for _, _, tree in read_auto(self.filepath) if tree.word != 'FAILED']
        logger.info(f'loaded {len(trees)} trees')
        for tree in trees:
            self._traverse(tree)
        self._create_samples(trees)

        cats = {k: v for k, v in self.cats.items() if v >= self.cat_freq_cut}
        self._write(cats, args.OUT / 'target.txt')

        words = {k: v for k, v in self.words.items() if v >= self.word_freq_cut}
        self._write(words, args.OUT / 'words.txt')
github masashi-y / depccg / depccg / tools / data.py View on Github external
def create_testdata(args):
        self = TrainingDataCreator(args.PATH,
                                   args.word_freq_cut,
                                   args.cat_freq_cut,
                                   args.afix_freq_cut)

        trees = [tree for _, _, tree in read_auto(self.filepath)]
        self._create_samples(trees)

        with open(args.OUT / 'testdata.json', 'w') as f:
            logger.info(f'writing to {f.name}')
            json.dump(self.samples, f)

        with open(args.OUT / 'testsents.txt', 'w') as f:
            logger.info(f'writing to {f.name}')
            for sent in self.sents:
                print(sent, file=f)
github masashi-y / depccg / depccg / tools / data.py View on Github external
def convert_json(autopath):
        self = TrainingDataCreator(autopath, None, None, None)
        trees = [tree for _, _, tree in read_auto(self.filepath) if tree.word != 'FAILED']
        logger.info(f'loaded {len(trees)} trees')
        self._create_samples(trees)
        return self.samples
github masashi-y / depccg / depccg / tools / data.py View on Github external
help='only allow categories which appear >= freq-cut')
    parser.add_argument('--word-freq-cut',
            type=int,
            default=5,
            help='only allow words which appear >= freq-cut')
    parser.add_argument('--afix-freq-cut',
            type=int,
            default=5,
            help='only allow afixes which appear >= freq-cut')
    parser.add_argument('--mode',
            choices=['train', 'test'],
            default='train')

    args = parser.parse_args()
    if args.mode == 'train':
        TrainingDataCreator.create_traindata(args)
    else:
        TrainingDataCreator.create_testdata(args)
github masashi-y / depccg / depccg / tools / data.py View on Github external
def convert_auto_to_json(autopath):
    return TrainingDataCreator.convert_json(autopath)