How to use the pywb.utils.io.no_except_close function in pywb

To help you get started, we’ve selected a few pywb examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github webrecorder / pywb / pywb / apps / rewriterapp.py View on Github external
details = dict(args=kwargs, error=error)
            if r.status_code == 404:
                raise NotFoundException(url=wb_url.url, msg=details)

            else:
                raise UpstreamException(r.status_code, url=wb_url.url, details=details)

        cdx = CDXObject(r.headers.get('Warcserver-Cdx').encode('utf-8'))

        cdx_url_parts = urlsplit(cdx['url'])

        if cdx_url_parts.path.endswith('/') and not url_parts.path.endswith('/'):
            # add trailing slash
            new_path = url_parts.path + '/'

            no_except_close(r.raw)

            return self.send_redirect(new_path, url_parts, urlrewriter)

        stream = BufferedReader(r.raw, block_size=BUFF_SIZE)
        record = self.loader.parse_record_stream(stream,
                                                 ensure_http_headers=True)

        memento_dt = r.headers.get('Memento-Datetime')
        target_uri = r.headers.get('WARC-Target-URI')

        # cdx['urlkey'] = urlkey
        # cdx['timestamp'] = http_date_to_timestamp(memento_dt)
        # cdx['url'] = target_uri

        set_content_loc = False
github webrecorder / pywb / pywb / warcserver / resource / responseloader.py View on Github external
warc_headers = payload.rec_headers

        if headers != payload:
            warc_headers.replace_header('WARC-Refers-To-Target-URI',
                     payload.rec_headers.get_header('WARC-Target-URI'))

            warc_headers.replace_header('WARC-Refers-To-Date',
                     payload.rec_headers.get_header('WARC-Date'))

            warc_headers.replace_header('WARC-Target-URI',
                     headers.rec_headers.get_header('WARC-Target-URI'))

            warc_headers.replace_header('WARC-Date',
                     headers.rec_headers.get_header('WARC-Date'))
            no_except_close(headers.raw_stream)

        return (warc_headers, http_headers_buff, payload.raw_stream)
github webrecorder / pywb / pywb / utils / loaders.py View on Github external
def load_yaml_config(config_file):
    config = None
    configdata = None
    try:
        configdata = load(config_file)
        config = yaml.load(configdata, Loader=yaml.Loader)
    finally:
        no_except_close(configdata)

    return config
github webrecorder / pywb / pywb / warcserver / resource / responseloader.py View on Github external
upstream_res = manager.urlopen(method=method,
                                           url=load_url,
                                           body=data,
                                           headers=req_headers,
                                           redirect=False,
                                           assert_same_host=False,
                                           preload_content=False,
                                           decode_content=False,
                                           retries=max_retries,
                                           timeout=params.get('_timeout'))

            return upstream_res

        except Exception as e:
            if upstream_res:
                no_except_close(upstream_res)
            if logger.isEnabledFor(logging.DEBUG):
                import traceback
                traceback.print_exc()
                logger.debug('FAILED: ' + method + ' ' + load_url + ': ' + str(e))

            raise LiveResourceException(load_url)
github webrecorder / pywb / pywb / recorder / recorderapp.py View on Github external
skipping = True

            if not skipping:
                entry = (self.req.headers, self.req.out,
                         self.headers, self.out, self.params)
                self.queue.put(entry)
        except Exception:
            traceback.print_exc()
            skipping = True

        finally:
            if skipping:
                no_except_close(self.out)
                no_except_close(self.req.out)

            no_except_close(self.req)
            self.req = None
github webrecorder / pywb / pywb / warcserver / index / zipnum.py View on Github external
total_pages = 1

        if query.page_count:
            # same line, so actually need to look at cdx
            # to determine if it exists
            if blocks == 0:
                try:
                    block_cdx_iter = self.idx_to_cdx([first_line], query)
                    block = six.next(block_cdx_iter)
                    cdx = six.next(block)
                except StopIteration:
                    total_pages = 0
                    blocks = -1

            yield self._page_info(total_pages, pagesize, blocks + 1)
            no_except_close(reader)
            return

        curr_page = query.page
        if curr_page >= total_pages or curr_page < 0:
            msg = 'Page {0} invalid: First Page is 0, Last Page is {1}'
            no_except_close(reader)
            raise CDXException(msg.format(curr_page, total_pages - 1))

        startline = curr_page * pagesize
        endline = startline + pagesize - 1
        if blocks >= 0:
            endline = min(endline, blocks)

        if curr_page == 0:
            yield first_line
        else:
github webrecorder / pywb / pywb / recorder / recorderapp.py View on Github external
req_pay.seek(0)
                req = self.writer.create_warc_record(uri=uri,
                                                     record_type='request',
                                                     payload=req_pay,
                                                     length=req_length,
                                                     warc_headers_dict=req_head)

                self.writer.write_request_response_pair(req, resp, params)

            else:
                self.writer.write_record(resp, params)

        finally:
            try:
                if req_pay:
                    no_except_close(req_pay)

                if resp_pay:
                    no_except_close(resp_pay)
            except Exception as e:
                traceback.print_exc()
github webrecorder / pywb / pywb / recorder / recorderapp.py View on Github external
record = self.writer.create_warc_record(uri=params['url'],
                                                    record_type=record_type,
                                                    payload=req_stream.out,
                                                    length=payload_length,
                                                    warc_content_type=content_type,
                                                    warc_headers_dict=req_stream.headers)

            self.writer.write_record(record, params)

            msg = {'success': 'true',
                   'WARC-Date': record.rec_headers.get_header('WARC-Date')}

        finally:
            if req_stream:
                no_except_close(req_stream.out)

        return self.send_message(msg,
                                 '200 OK',
                                 start_response)
github webrecorder / pywb / pywb / utils / loaders.py View on Github external
# if starting with . or /, can only be a file path..
        file_only = url.startswith(('/', '.'))

        # convert to filename
        filename = from_file_url(url)
        if filename != url:
            file_only = True
            url = filename

        afile = None
        try:
            # first, try as file
            afile = open(url, 'rb')

        except IOError:
            no_except_close(afile)
            if file_only:
                raise

            return super(LocalFileLoader, self).load(url, offset, length)

        if offset > 0:
            afile.seek(offset)

        if length >= 0:
            return LimitReader(afile, length)
        else:
            return afile
github webrecorder / pywb / pywb / warcserver / index / indexsource.py View on Github external
def handle_timemap(self, params):
        url = res_template(self.timemap_url, params)
        headers = self._get_headers(params)
        res = None
        try:
            res = self.sesh.get(url,
                                headers=headers,
                                timeout=params.get('_timeout'))

            res.raise_for_status()
            assert(res.text)

        except Exception as e:
            no_except_close(res)
            self.logger.debug('FAILED: ' + str(e))
            raise NotFoundException(url)

        links = res.text
        return self.links_to_cdxobject(links, 'timemap')