How to use the ipwb.util.unsurt function in ipwb

To help you get started, we’ve selected a few ipwb examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github oduwsdl / ipwb / ipwb / replay.py View on Github external
if not indexFileContents:
        return 0

    lines = indexFileContents.strip().split('\n')

    uris = {}
    for i, l in enumerate(lines):
        if not ipwbUtils.isValidCDXJLine(l):
            continue

        if ipwbUtils.isCDXJMetadataRecord(l):
            continue

        cdxjFields = l.split(' ', 2)
        uri = unsurt(cdxjFields[0])
        datetime = cdxjFields[1]

        try:
            jsonFields = json.loads(cdxjFields[2])
        except Exception as e:  # Skip lines w/o JSON block
            continue

        if uri not in uris:
            uris[uri] = []

        mementoAsJSON = {
            'datetime': datetime,
            'mime': jsonFields['mime_type'] or '',
            'status': jsonFields['status_code']
        }
        if 'title' in jsonFields:
github oduwsdl / ipwb / ipwb / replay.py View on Github external
def generateLinkTimeMapFromCDXJLines(cdxjLines, original, tmself, tgURI):
    tmurl = getProxiedURIT(tmself)

    if app.proxy is not None:
        tmself = urlunsplit(tmurl)
        tgURI = urlunsplit(getProxiedURIT(tgURI))

    # Extract and trim for host:port prepending
    tmurl[2] = ''  # Clear TM path
    hostAndPort = urlunsplit(tmurl) + '/'

    # unsurted URI will never have a scheme, add one
    originalURI = 'http://{0}'.format(unsurt(original))

    tmData = '<{0}>; rel="original",\n'.format(originalURI)
    tmData += '<{0}>; rel="self timemap"; '.format(tmself)
    tmData += 'type="application/link-format",\n'

    cdxjTMURI = tmself.replace('/timemap/link/', '/timemap/cdxj/')
    tmData += '<{0}>; rel="timemap"; '.format(cdxjTMURI)
    tmData += 'type="application/cdxj+ors",\n'

    tmData += '<{0}>; rel="timegate"'.format(tgURI)

    for i, line in enumerate(cdxjLines):
        (surtURI, datetime, json) = line.split(' ', 2)
        dtRFC1123 = ipwbUtils.digits14ToRFC1123(datetime)
        firstLastStr = ''
github oduwsdl / ipwb / ipwb / replay.py View on Github external
for i, line in enumerate(cdxjLines):
        (surtURI, datetime, json) = line.split(' ', 2)
        dtRFC1123 = ipwbUtils.digits14ToRFC1123(datetime)
        firstLastStr = ''

        if len(cdxjLines) > 1:
            if i == 0:
                firstLastStr = 'first '
            elif i == len(cdxjLines) - 1:
                firstLastStr = 'last '
        elif len(cdxjLines) == 1:
            firstLastStr = 'first last '

        tmData += ',\n<{0}memento/{1}/{2}>; rel="{3}memento"; datetime="{4}"' \
                  .format(hostAndPort, datetime, unsurt(surtURI), firstLastStr,
                          dtRFC1123)
    return tmData + '\n'
github oduwsdl / ipwb / ipwb / replay.py View on Github external
def generateCDXJTimeMapFromCDXJLines(cdxjLines, original, tmself, tgURI):
    tmurl = getProxiedURIT(tmself)
    if app.proxy is not None:
        tmself = urlunsplit(tmurl)
        tgURI = urlunsplit(getProxiedURIT(tgURI))

    # unsurted URI will never have a scheme, add one
    originalURI = 'http://{0}'.format(unsurt(original))

    tmData = '!context ["http://tools.ietf.org/html/rfc7089"]\n'
    tmData += '!id {{"uri": "{0}"}}\n'.format(tmself)
    tmData += '!keys ["memento_datetime_YYYYMMDDhhmmss"]\n'
    tmData += '!meta {{"original_uri": "{0}"}}\n'.format(originalURI)
    tmData += '!meta {{"timegate_uri": "{0}"}}\n'.format(tgURI)
    linkTMURI = tmself.replace('/timemap/cdxj/', '/timemap/link/')
    tmData += ('!meta {{"timemap_uri": {{'
               '"link_format": "{0}", '
               '"cdxj_format": "{1}"'
               '}}}}\n').format(linkTMURI, tmself)
    hostAndPort = tmself[0:tmself.index('timemap/')]

    for i, line in enumerate(cdxjLines):
        (surtURI, datetime, json) = line.split(' ', 2)
        dtRFC1123 = ipwbUtils.digits14ToRFC1123(datetime)
github oduwsdl / ipwb / ipwb / replay.py View on Github external
s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
    indexPath = ipwbUtils.getIPWBReplayIndexPath()

    print('Getting CDXJ Lines with the URI-R {0} from {1}'
          .format(urir, indexPath))
    cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)

    closestLine = getCDXJLineClosestTo(datetime, cdxjLinesWithURIR)

    if closestLine is None:
        msg = '<h1>ERROR 404</h1>'
        msg += 'No capture found for {0} at {1}.'.format(urir, datetime)

        return Response(msg, status=404)

    uri = unsurt(closestLine.split(' ')[0])
    newDatetime = closestLine.split(' ')[1]

    linkHeader = getLinkHeaderAbbreviatedTimeMap(urir, newDatetime)

    return (newDatetime, linkHeader, uri)
github oduwsdl / ipwb / ipwb / replay.py View on Github external
def generateNoMementosInterface(path, datetime):
    msg = '<h1>ERROR 404</h1>'
    msg += 'No capture found for {0} at {1}.'.format(path, datetime)

    linesWithSameURIR = getCDXJLinesWithURIR(path, None)
    print('CDXJ lines with URI-R at {0}'.format(path))
    print(linesWithSameURIR)

    # TODO: Use closest instead of conditioning on single entry
    #  temporary fix for core functionality in #225
    if len(linesWithSameURIR) == 1:
        fields = linesWithSameURIR[0].split(' ', 2)
        redirectURI = '/{1}/{0}'.format(unsurt(fields[0]), fields[1])

        return redirect(redirectURI, code=302)

    urir = ''
    if linesWithSameURIR:
        msg += '<p>{0} capture(s) available:</p><ul>'.format(
            len(linesWithSameURIR))
        for line in linesWithSameURIR:
            fields = line.split(' ', 2)
            urir = unsurt(fields[0])
            msg += ('<li><a href="/{1}/{0}">{0} at {1}</a></li>'
                    .format(urir, fields[1]))
        msg += '</ul>'

    msg += '<p>TimeMaps: '
    msg += '<a href="/timemap/link/{0}">Link</a> '.format(urir)</p>