Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: redirects are less aggressive #89

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/gateway-conformance/src/conformance.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ const tests: TestConfig[] = [
skip: [
'TestGatewaySubdomains/.*HTTP_proxy_tunneling_via_CONNECT' // verified fetch should not be doing HTTP proxy tunneling.
],
successRate: 41.35
successRate: 47.26
},
{
name: 'TestUnixFSDirectoryListingOnSubdomainGateway',
Expand Down
143 changes: 75 additions & 68 deletions packages/verified-fetch/src/utils/handle-redirects.ts
Original file line number Diff line number Diff line change
@@ -1,101 +1,108 @@
import { type AbortOptions, type ComponentLogger } from '@libp2p/interface'
import { CodeError, type AbortOptions, type ComponentLogger } from '@libp2p/interface'
import { type VerifiedFetchInit, type Resource } from '../index.js'
import { matchURLString } from './parse-url-string.js'
import { movedPermanentlyResponse } from './responses.js'
import type { CID } from 'multiformats/cid'

interface GetRedirectResponse {
interface GetRedirectResponseOptions {
cid: CID
resource: Resource
options?: Omit<VerifiedFetchInit, 'signal'> & AbortOptions
logger: ComponentLogger
}

/**
* Only used in testing.
*/
fetch?: typeof globalThis.fetch
interface GetSubdomainRedirectOptions extends GetRedirectResponseOptions {
resource: string
}

function maybeAddTraillingSlash (path: string): string {
// if it has an extension-like ending, don't add a trailing slash
if (path.match(/\.[a-zA-Z0-9]{1,4}$/) != null) {
return path
/**
* If given only a path, i.e. /ipfs/QmHash, this function will return the path only, with a trailing slash if the path part doesn't have an extension-like ending.
* If given a full URL, it will return that same URL, with a trailing slash on the path if the path part doesn't have an extension-like ending.
*
* This is only used for directory normalization with UnixFS directory requests.
*/
export function getSpecCompliantPath (resource: string): string {
let url: URL
let isInvalidURL = false
try {
url = new URL(resource)
} catch {
isInvalidURL = true
url = new URL(resource, 'http://example.com')
}
return path.endsWith('/') ? path : `${path}/`
}
const { pathname } = url

// See https://specs.ipfs.tech/http-gateways/path-gateway/#location-response-header
export async function getRedirectResponse ({ resource, options, logger, cid, fetch = globalThis.fetch }: GetRedirectResponse): Promise<null | Response> {
const log = logger.forComponent('helia:verified-fetch:get-redirect-response')
let specCompliantPath = pathname

if (typeof resource !== 'string' || options == null || ['ipfs://', 'ipns://'].some((prefix) => resource.startsWith(prefix))) {
return null
if (pathname.match(/\.[a-zA-Z0-9]{1,4}$/) == null && !pathname.endsWith('/')) {
// no extension-like ending, add a trailing slash
specCompliantPath = `${pathname}/`
}

if (isInvalidURL) {
return specCompliantPath
}

// the below is needed to get around a bug with some environments removing the trailing slash when calling url.href or url.toString()
if (specCompliantPath.startsWith('//')) {
// likely ipfs:// or ipns:// url
return `${url.protocol}${specCompliantPath}${url.search}${url.hash}`
}
return `${url.protocol}//${url.host}${specCompliantPath}${url.search}${url.hash}`
}

/**
* Handles determining if a redirect to subdomain is needed.
*/
export async function getRedirectUrl ({ resource, options, logger, cid }: GetSubdomainRedirectOptions): Promise<string> {
const log = logger.forComponent('helia:verified-fetch:get-subdomain-redirect')
const headers = new Headers(options?.headers)
const forwardedHost = headers.get('x-forwarded-host')
const headerHost = headers.get('host')
const forwardedFor = headers.get('x-forwarded-for')
if (forwardedFor == null && forwardedHost == null && headerHost == null) {
log.trace('no redirect info found in headers')
return null
}
const forwardedProto = headers.get('x-forwarded-proto')

log.trace('checking for redirect info')
// if x-forwarded-host is passed, we need to set the location header to the subdomain
// so that the browser can redirect to the correct subdomain
try {
const urlParts = matchURLString(resource)
if (urlParts.cidOrPeerIdOrDnsLink.length > 63) {
if (urlParts.protocol === 'ipfs') {
throw new CodeError('CID incompatible with DNS label length limit of 63', 'DNS_LABEL_INCOMPATIBLE_CID_SUBDOMAIN')
}
throw new CodeError('PeerId or DNSLink incompatible with DNS label length limit of 63', 'DNS_LABEL_INCOMPATIBLE_SUBDOMAIN')
}

if (forwardedHost == null && forwardedProto == null) {
log.trace('no redirect info found in headers')
throw new CodeError('No redirect info found in headers', 'NO_REDIRECT_INFO_FOUND')
}
const reqUrl = new URL(resource)
reqUrl.protocol = forwardedProto ?? reqUrl.protocol
const actualHost = forwardedHost ?? reqUrl.host
const subdomainUrl = new URL(reqUrl)
if (urlParts.protocol === 'ipfs' && cid.version === 0) {
subdomainUrl.host = `${cid.toV1()}.ipfs.${actualHost}`
} else {
subdomainUrl.host = `${urlParts.cidOrPeerIdOrDnsLink}.${urlParts.protocol}.${actualHost}`
const subdomain = `${urlParts.cidOrPeerIdOrDnsLink}.${urlParts.protocol}`
if (actualHost.includes(subdomain)) {
log.trace('request was for a subdomain already. Returning requested resource.')
return resource
}

if (headerHost?.includes(urlParts.protocol) === true && subdomainUrl.host.includes(headerHost)) {
log.trace('request was for a subdomain already, not setting location header')
return null
}
let subdomainHost = `${urlParts.cidOrPeerIdOrDnsLink}.${urlParts.protocol}.${actualHost}`

if (headerHost != null && !subdomainUrl.host.includes(headerHost)) {
log.trace('host header is not the same as the subdomain url host, not setting location header')
return null
if (urlParts.protocol === 'ipfs' && cid.version === 0) {
subdomainHost = `${cid.toV1()}.ipfs.${actualHost}`
}
if (reqUrl.host === subdomainUrl.host) {
log.trace('req url is the same as the subdomain url, not setting location header')
return null
const subdomainUrl = new URL(reqUrl)
subdomainUrl.host = subdomainHost
subdomainUrl.pathname = reqUrl.pathname.replace(`/${urlParts.cidOrPeerIdOrDnsLink}`, '').replace(`/${urlParts.protocol}`, '')

if (headerHost != null && headerHost === subdomainUrl.host) {
log.trace('request was for a subdomain already. Returning requested resource.')
return resource
}

subdomainUrl.pathname = maybeAddTraillingSlash(reqUrl.pathname.replace(`/${urlParts.cidOrPeerIdOrDnsLink}`, '').replace(`/${urlParts.protocol}`, ''))
log.trace('subdomain url %s', subdomainUrl.href)
const pathUrl = new URL(reqUrl, `${reqUrl.protocol}//${actualHost}`)
pathUrl.pathname = maybeAddTraillingSlash(reqUrl.pathname)
log.trace('path url %s', pathUrl.href)
// try to query subdomain with HEAD request to see if it's supported
try {
const subdomainTest = await fetch(subdomainUrl, { method: 'HEAD' })
if (subdomainTest.ok) {
log('subdomain supported, redirecting to subdomain')
return movedPermanentlyResponse(resource.toString(), subdomainUrl.href)
} else {
log('subdomain not supported, subdomain failed with status %s %s', subdomainTest.status, subdomainTest.statusText)
throw new Error('subdomain not supported')
}
} catch (err: any) {
log('subdomain not supported', err)
if (pathUrl.href === reqUrl.href) {
log('path url is the same as the request url, not setting location header')
return null
}
// pathUrl is different from request URL (maybe even with just a trailing slash)
return movedPermanentlyResponse(resource.toString(), pathUrl.href)
return subdomainUrl.toString()
} catch (err: any) {
log.error('error while checking for subdomain support', err)
if (err.code != null) {
throw err
}
} catch (e) {
// if it's not a full URL, we have nothing left to do.
log.error('error setting location header for x-forwarded-host', e)
}
return null

return resource
}
28 changes: 22 additions & 6 deletions packages/verified-fetch/src/verified-fetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import { getETag } from './utils/get-e-tag.js'
import { getResolvedAcceptHeader } from './utils/get-resolved-accept-header.js'
import { getStreamFromAsyncIterable } from './utils/get-stream-from-async-iterable.js'
import { tarStream } from './utils/get-tar-stream.js'
import { getRedirectResponse } from './utils/handle-redirects.js'
import { getSpecCompliantPath, getRedirectUrl } from './utils/handle-redirects.js'
import { parseResource } from './utils/parse-resource.js'
import { type ParsedUrlStringResults } from './utils/parse-url-string.js'
import { resourceToSessionCacheKey } from './utils/resource-to-cache-key.js'
Expand Down Expand Up @@ -325,8 +325,10 @@ export class VerifiedFetch {
this.log('could not redirect to %s/ as redirect option was set to "error"', resource)
throw new TypeError('Failed to fetch')
} else if (options?.redirect === 'manual') {
this.log('returning 301 permanent redirect to %s/', resource)
return movedPermanentlyResponse(resource, `${resource}/`)
const properPath = getSpecCompliantPath(resource)
const redirectUrl = await getRedirectUrl({ resource: properPath, cid, options, logger: this.helia.logger })
this.log('returning 301 permanent redirect to %s', redirectUrl)
return movedPermanentlyResponse(resource, redirectUrl)
}

// fall-through simulates following the redirect?
Expand Down Expand Up @@ -476,6 +478,7 @@ export class VerifiedFetch {
* TODO: move operations called by fetch to a queue of operations where we can
* always exit early (and cleanly) if a given signal is aborted
*/
// eslint-disable-next-line complexity
async fetch (resource: Resource, opts?: VerifiedFetchOptions): Promise<Response> {
this.log('fetch %s', resource)

Expand Down Expand Up @@ -519,9 +522,22 @@ export class VerifiedFetch {
let response: Response
let reqFormat: RequestFormatShorthand | undefined

const redirectResponse = await getRedirectResponse({ resource, options, logger: this.helia.logger, cid })
if (redirectResponse != null) {
return redirectResponse
// subdomain redirects don't make sense for `fetch(cid)`, only for `fetch(path)` or `fetch(url)`
// if a specific format is requested, that should be handled by the `accept === '...'` checks
// subdomain redirects for unixFS is handled in handleDagPb
if (typeof resource === 'string' && query.format == null && cid.code !== dagPbCode) {
try {
const redirectUrl = await getRedirectUrl({ resource, cid, options, logger: this.helia.logger })
if (redirectUrl !== resource) {
this.log.trace('returning 301 permanent redirect to %s', redirectUrl)
return movedPermanentlyResponse(resource.toString(), redirectUrl)
}
} catch (err: any) {
if (err.code.startsWith('DNS_LABEL_INCOMPATIBLE') === true) {
return badRequestResponse(resource, err)
}
// ignore
}
}

const handlerArgs: FetchHandlerFunctionArg = { resource: resource.toString(), cid, path, accept, session: options?.session ?? true, options }
Expand Down
105 changes: 48 additions & 57 deletions packages/verified-fetch/test/utils/handle-redirects.spec.ts
Original file line number Diff line number Diff line change
@@ -1,84 +1,75 @@
import { prefixLogger } from '@libp2p/logger'
import { expect } from 'aegir/chai'
import { CID } from 'multiformats/cid'
import Sinon from 'sinon'
import { getRedirectResponse } from '../../src/utils/handle-redirects.js'
import { getRedirectUrl, getSpecCompliantPath } from '../../src/utils/handle-redirects.js'

const logger = prefixLogger('test:handle-redirects')

describe('handle-redirects', () => {
describe('getRedirectResponse', () => {
const sandbox = Sinon.createSandbox()
const cid = CID.parse('bafkqabtimvwgy3yk')
const cid = CID.parse('bafkqabtimvwgy3yk')

let fetchStub: Sinon.SinonStub
describe('getSpecCompliantPath', () => {
// the below are all assuming the above identity CID is a unixFS directory CID
it('should handle ipfs:// urls', () => {
expect(getSpecCompliantPath(`ipfs://${cid}`)).to.equal(`ipfs://${cid}/`)
expect(getSpecCompliantPath(`ipfs://${cid}/file.txt`)).to.equal(`ipfs://${cid}/file.txt`)
})

beforeEach(() => {
fetchStub = sandbox.stub(globalThis, 'fetch')
it('should handle ipns:// urls', () => {
expect(getSpecCompliantPath(`ipns://${cid}`)).to.equal(`ipns://${cid}/`)
expect(getSpecCompliantPath(`ipns://${cid}/file.txt`)).to.equal(`ipns://${cid}/file.txt`)
})

afterEach(() => {
sandbox.restore()
it('should handle http:// path urls', () => {
expect(getSpecCompliantPath(`http://ipfs.io/ipfs/${cid}`)).to.equal(`http://ipfs.io/ipfs/${cid}/`)
expect(getSpecCompliantPath(`http://ipfs.io/ipfs/${cid}/file.txt`)).to.equal(`http://ipfs.io/ipfs/${cid}/file.txt`)
})

const nullResponses = [
{ resource: cid, options: {}, logger, cid, testTitle: 'should return null if resource is not a string' },
{ resource: 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk', options: undefined, logger, cid, testTitle: 'should return null if options is undefined' },
{ resource: 'ipfs://', options: {}, logger, cid, testTitle: 'should return null for ipfs:// protocol urls' },
{ resource: 'ipns://', options: {}, logger, cid, testTitle: 'should return null for ipns:// protocol urls' }
]

nullResponses.forEach(({ resource, options, logger, cid, testTitle }) => {
it(testTitle, async () => {
const response = await getRedirectResponse({ resource, options, logger, cid })
expect(response).to.be.null()
})
it('should handle http:// subdomain urls', () => {
expect(getSpecCompliantPath(`http://ipfs.io/ipfs/${cid}`)).to.equal(`http://ipfs.io/ipfs/${cid}/`)
expect(getSpecCompliantPath(`http://ipfs.io/ipfs/${cid}/file.txt`)).to.equal(`http://ipfs.io/ipfs/${cid}/file.txt`)
})
})

describe('getRedirectUrl', () => {
it('returns path gateway url if headers is empty', async () => {
const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk'
const options = { headers: new Headers() }

const url = await getRedirectUrl({ resource, options, logger, cid })
expect(url).to.equal('http://ipfs.io/ipfs/bafkqabtimvwgy3yk')
})

it('should attempt to get the current host from the headers', async () => {
it('returns subdomain gateway url if host is passed', async () => {
const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk'
const options = { headers: new Headers({ 'x-forwarded-host': 'localhost:3931' }) }
fetchStub.returns(Promise.resolve(new Response(null, { status: 200 })))

const response = await getRedirectResponse({ resource, options, logger, cid, fetch: fetchStub })
expect(fetchStub.calledOnce).to.be.true()
expect(response).to.not.be.null()
expect(response).to.have.property('status', 301)
const location = response?.headers.get('location')
expect(location).to.equal('http://bafkqabtimvwgy3yk.ipfs.localhost:3931/')
const options = { headers: new Headers({ host: 'ipfs.io' }) }

const url = await getRedirectUrl({ resource, options, logger, cid })
expect(url).to.equal('http://bafkqabtimvwgy3yk.ipfs.ipfs.io/')
})

it('should return redirect response to requested host with trailing slash when HEAD fetch fails', async () => {
it('returns subdomain gateway url if x-forwarded-host is passed', async () => {
const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk'
const options = { headers: new Headers({ 'x-forwarded-host': 'localhost:3931' }) }
fetchStub.returns(Promise.reject(new Response(null, { status: 404 })))

const response = await getRedirectResponse({ resource, options, logger, cid, fetch: fetchStub })
expect(fetchStub.calledOnce).to.be.true()
expect(response).to.not.be.null()
expect(response).to.have.property('status', 301)
const location = response?.headers.get('location')
// note that the URL returned in location header has trailing slash.
expect(location).to.equal('http://ipfs.io/ipfs/bafkqabtimvwgy3yk/')
const options = { headers: new Headers({ 'x-forwarded-host': 'dweb.link' }) }

const url = await getRedirectUrl({ resource, options, logger, cid })
expect(url).to.equal('http://bafkqabtimvwgy3yk.ipfs.dweb.link/')
})

it('should not return redirect response to x-forwarded-host if HEAD fetch fails', async () => {
const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk/file.txt'
const options = { headers: new Headers({ 'x-forwarded-host': 'localhost:3931' }) }
fetchStub.returns(Promise.reject(new Response(null, { status: 404 })))
it('returns https subdomain gateway url if proto & host are passed', async () => {
const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk'
const options = { headers: new Headers({ host: 'ipfs.io', 'x-forwarded-proto': 'https' }) }

const response = await getRedirectResponse({ resource, options, logger, cid, fetch: fetchStub })
expect(fetchStub.calledOnce).to.be.true()
expect(response).to.be.null()
const url = await getRedirectUrl({ resource, options, logger, cid })
expect(url).to.equal('https://bafkqabtimvwgy3yk.ipfs.ipfs.io/')
})

it('should not return redirect response to x-forwarded-host when HEAD fetch fails and trailing slash already exists', async () => {
const resource = 'http://ipfs.io/ipfs/bafkqabtimvwgy3yk/'
const options = { headers: new Headers({ 'x-forwarded-host': 'localhost:3931' }) }
fetchStub.returns(Promise.reject(new Response(null, { status: 404 })))
it('returns the given subdomain gateway url given a subdomain gateway url', async () => {
const resource = 'https://bafkqabtimvwgy3yk.ipfs.inbrowser.dev'
const options = { headers: new Headers({ host: 'bafkqabtimvwgy3yk.ipfs.inbrowser.dev' }) }

const response = await getRedirectResponse({ resource, options, logger, cid, fetch: fetchStub })
expect(fetchStub.calledOnce).to.be.true()
expect(response).to.be.null()
const url = await getRedirectUrl({ resource, options, logger, cid })
expect(url).to.equal('https://bafkqabtimvwgy3yk.ipfs.inbrowser.dev')
})
})
})
Loading