How to use the luigi.File function in luigi

To help you get started, we’ve selected a few luigi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miku / siskin / siskin / sources / bsz.py View on Github external
def run(self):
        filenames = ' '.join([obj.fn for obj in self.input()])
        t = """cat {files} | LANG=C awk '{{print $1}}' | LANG=C sort -u > {output}"""
        temp = shellout(t, files=filenames)
        luigi.File(temp).move(self.output().fn)
github miku / siskin / siskin / sources / dbpedia.py View on Github external
def run(self):
        with self.input().open() as handle:
            for row in handle.iter_tsv(cols=('path',)):
                if row.path.endswith('short_abstracts_%s.%s' % (self.language, self.format)):
                    luigi.File(row.path).copy(self.output().path)
                    break
            else:
                raise RuntimeError('no file found')
github miku / siskin / siskin / sources / yago.py View on Github external
def run(self):
        url = "http://resources.mpi-inf.mpg.de/yago-naga/yago/download/yago/yago3_entire_tsv.7z"
        output = shellout("""wget --retry-connrefused -O {output} {url}""", url=url)
        luigi.File(output).move(self.output().path)
github miku / siskin / siskin / sources / pqdtopen.py View on Github external
def requires(self):
        output = shellout("oaimi -verbose http://pqdtoai.proquest.com/OAIHandler > {output}")
        luigi.File(output).move(self.output().path)
github miku / siskin / docs / elag-2016 / byoi / code / scaffold2_crossref.py View on Github external
def run(self):
        """
        TODO: For each file, we want to run a jq command.
        """
        _, temp = tempfile.mkstemp(prefix='byoi-')
        with self.input().open() as handle:
            # TODO: insert code here
            pass

        luigi.File(temp).move(self.output().path)
github miku / siskin / siskin / workflows / fuzzy.py View on Github external
def run(self):
        # find similar titles
        output = shellout("""esmlt -host {host} -port {port} -indices "{target}" -fields "content.245.a content.245.b"
                             -file "{file}" -columns "4,5" > {output} """, host=self.es_host, port=self.es_port,
                             file=self.input().get('file').path, target=self.target)
        _, stopover = tempfile.mkstemp(prefix='siskin-')
        with luigi.File(output, format=TSV).open() as handle:
            with luigi.File(stopover, format=TSV).open('w') as output:
                for row in handle.iter_tsv(cols=('idl', 'il', 'tl', 'til', 'sl', 'idr', 'ir', 'tr', 'score', 'tir', 'sr')):
                    ml = ' '.join([v for v in (row.til, row.sl) if v and not v == "NOT_AVAILABLE"])
                    mr = ' '.join([v for v in (row.tir, row.sr) if v and not v == "NOT_AVAILABLE"])
                    if not ml: ml = "NOT_AVAILABLE"
                    if not mr: mr = "NOT_AVAILABLE"
                    output.write_tsv(row.idl, row.il, row.tl, ml,
                                     row.idr, row.ir, row.tr, mr, row.score)
        luigi.File(stopover).move(self.output().path)
github miku / siskin / siskin / sources / geonames.py View on Github external
def run(self):
        ids = set()
        with self.input().get('gnd').open() as handle:
            for row in handle.iter_tsv(cols=('uri',)):
                ids.add(row.uri.rstrip('/'))

        _, stopover = tempfile.mkstemp(prefix='siskin-')
        with self.input().get('geo').open() as handle:
            with luigi.File(stopover).open('w') as output:
                while True:
                    try:
                        line = handle.next().strip()
                        if line.startswith('http://'):
                            content = handle.next()
                            if line.rstrip('/') in ids:
                                output.write(content)
                    except StopIteration:
                        break

        _, t = tempfile.mkstemp(prefix='siskin-')
        output = shellout("""while read r; do echo $r > {t} &&
                             rapper -q -i rdfxml -o ntriples {t} >> {output}; done < {input} """,
                             t=t, input=stopover)
        luigi.File(output).move(self.output().path)
github miku / siskin / siskin / workflows / hmt.py View on Github external
def run(self):
        output = shellout("cut -f 2-3 < {input} | LANG=C sort -u > {output}",
                          input=self.input().path)
        luigi.File(output).move(self.output().path)
github miku / siskin / docs / elag-2016 / byoi / code / part6_export.py View on Github external
def run(self):
        output = shellout("span-export <(unpigz -c {input}) | pigz -c > {output}", input=self.input().path)
        luigi.File(output).move(self.output().path)
github miku / siskin / siskin / sources / nep.py View on Github external
def run(self):
        _, combined = tempfile.mkstemp(prefix='tasktree-')
        for target in self.input():
            shellout("cat {input} >> {output}", input=target.path,
                     output=combined)
        output = shellout("LANG=C sort -k1,1 -k3,3 {input} > {output}", input=combined)
        luigi.File(output).move(self.output().fn)