How to use the depccg.tools.ja.data.TrainingDataCreator function in depccg

To help you get started, we’ve selected a few depccg examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github masashi-y / depccg / depccg / tools / ja / data.py View on Github external
def convert_ccgbank_to_json(ccgbankpath):
    return TrainingDataCreator.convert_json(ccgbankpath)
github masashi-y / depccg / depccg / tools / ja / data.py View on Github external
def convert_json(autopath):
        self = TrainingDataCreator(autopath, None, None, None)
        trees = [tree for _, _, tree in read_ccgbank(self.filepath)]
        logger.info(f'loaded {len(trees)} trees')
        self._create_samples(trees)
        return self.samples
github masashi-y / depccg / depccg / tools / ja / data.py View on Github external
def create_testdata(args):
        self = TrainingDataCreator(args.PATH,
                                   args.word_freq_cut,
                                   args.cat_freq_cut,
                                   args.char_freq_cut)

        trees = [tree for _, _, tree in read_ccgbank(self.filepath)]
        self._create_samples(trees)

        with open(args.OUT / 'testdata.json', 'w') as f:
            logger.info(f'writing to {f.name}')
            json.dump(self.samples, f)

        with open(args.OUT / 'testsents.txt', 'w') as f:
            logger.info(f'writing to {f.name}')
            for sent in self.sents:
                print(sent, file=f)
github masashi-y / depccg / depccg / tools / ja / data.py View on Github external
help='only allow categories which appear >= freq-cut')
    parser.add_argument('--word-freq-cut',
                        type=int,
                        default=5,
                        help='only allow words which appear >= freq-cut')
    parser.add_argument('--char-freq-cut',
                        type=int,
                        default=5,
                        help='only allow characters which appear >= freq-cut')
    parser.add_argument('--mode',
                        choices=['train', 'test'],
                        default='train')

    args = parser.parse_args()
    if args.mode == 'train':
        TrainingDataCreator.create_traindata(args)
    else:
        TrainingDataCreator.create_testdata(args)
github masashi-y / depccg / depccg / tools / ja / data.py View on Github external
def create_traindata(args):
        self = TrainingDataCreator(args.PATH,
                                   args.word_freq_cut,
                                   args.char_freq_cut,
                                   args.cat_freq_cut)

        trees = [tree for _, _, tree in read_ccgbank(self.filepath)]
        for tree in trees:
            self._traverse(tree)
        self._create_samples(trees)

        cats = {k: v for k, v in self.cats.items() if v >= self.cat_freq_cut}
        self._write(cats, args.OUT / 'target.txt')

        words = {k: v for k, v in self.words.items() if v >= self.word_freq_cut}
        self._write(words, args.OUT / 'words.txt')

        chars = {k: v for k, v in self.chars.items() if v >= self.char_freq_cut}
github masashi-y / depccg / depccg / tools / ja / data.py View on Github external
type=int,
                        default=5,
                        help='only allow words which appear >= freq-cut')
    parser.add_argument('--char-freq-cut',
                        type=int,
                        default=5,
                        help='only allow characters which appear >= freq-cut')
    parser.add_argument('--mode',
                        choices=['train', 'test'],
                        default='train')

    args = parser.parse_args()
    if args.mode == 'train':
        TrainingDataCreator.create_traindata(args)
    else:
        TrainingDataCreator.create_testdata(args)