Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: links in the post could be relative links #47

Merged
merged 1 commit into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions app/src/main/java/run/halo/feed/RelativeLinkProcessor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package run.halo.feed;

import com.google.common.base.Throwables;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.Assert;
import org.springframework.web.util.UriComponentsBuilder;
import org.springframework.web.util.UriUtils;
import run.halo.app.core.attachment.ThumbnailSize;
import run.halo.app.infra.utils.PathUtils;
import run.halo.feed.telemetry.TelemetryEndpoint;

import java.net.URI;
import java.nio.charset.StandardCharsets;

@Slf4j
public class RelativeLinkProcessor {
private final URI externalUri;

public RelativeLinkProcessor(String externalUrl) {
Assert.notNull(externalUrl, "External URL must not be null");
this.externalUri = URI.create(externalUrl);
}

public String processForHtml(String html) {
try {
return doProcessForHtml(html);
} catch (Throwable e) {
log.warn("Failed to process relative links for HTML", Throwables.getRootCause(e));
}
return html;
}

private String doProcessForHtml(String html) {
var document = Jsoup.parse(html);

// Process all links
var links = document.select("a[href]");
processElementAttr(links, "href", false);
// process all images
var images = document.select("img[src]");
processElementAttr(images, "src", true);
// video/audio source src
var sources = document.select("source[src]");
processElementAttr(sources, "src", false);
// video src
var videos = document.select("video[src]");
processElementAttr(videos, "src", false);
// link href
var linksHref = document.select("link[href]");
processElementAttr(linksHref, "href", false);
// script src
var scripts = document.select("script[src]");
processElementAttr(scripts, "src", false);
// iframe src
var iframes = document.select("iframe[src]");
processElementAttr(iframes, "src", false);
// frame src
var frames = document.select("frame[src]");
processElementAttr(frames, "src", false);
// embed src
var embeds = document.select("embed[src]");
processElementAttr(embeds, "src", false);

return document.body().html();
// var outputHtml = document.body().html();
// return StringEscapeUtils.unescapeHtml4(outputHtml);
}

private void processElementAttr(Elements elements, String attrKey, boolean canThumb) {
for (Element link : elements) {
String src = link.attr(attrKey);
if (canThumb && isNotTelemetryLink(src)) {
var thumb = genThumbUrl(src, ThumbnailSize.M);
var absoluteUrl = processLink(thumb);
link.attr(attrKey, absoluteUrl);
} else {
var absoluteUrl = processLink(src);
link.attr(attrKey, absoluteUrl);
}
}
}

boolean isNotTelemetryLink(String uri) {
return uri != null && !uri.contains(TelemetryEndpoint.TELEMETRY_PATH);
}

private String genThumbUrl(String url, ThumbnailSize size) {
return processLink("/apis/api.storage.halo.run/v1alpha1/thumbnails/-/via-uri?uri="
+ UriUtils.encode(url, StandardCharsets.UTF_8)
+ "&size=" + size.name().toLowerCase()
);
}

private String processLink(String link) {
if (StringUtils.isBlank(link) || PathUtils.isAbsoluteUri(link)) {
return link;
}
var contextPath = StringUtils.defaultIfBlank(externalUri.getPath(), "/");
var linkUri = UriComponentsBuilder.fromUriString(URI.create(link).toASCIIString())
.build(true);
var builder = UriComponentsBuilder.fromUriString(externalUri.toString());
if (shouldAppendPath(contextPath, link)) {
builder.pathSegment(linkUri.getPathSegments().toArray(new String[0]));
} else {
builder.replacePath(linkUri.getPath());
}
return builder.query(linkUri.getQuery())
.fragment(linkUri.getFragment())
.build(true)
.toUri()
.toString();
}

private static boolean shouldAppendPath(String contextPath, String link) {
return !"/".equals(contextPath) && !link.startsWith(contextPath);
}
}
7 changes: 7 additions & 0 deletions app/src/main/java/run/halo/feed/RssXmlBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ private void createItemElementToChannel(Element channel, RSS2.Item item) {
itemElement.addElement("link").addText(item.getLink());

var description = Optional.of(getDescriptionWithTelemetry(item))
.map(content -> {
if (externalUrl != null) {
return new RelativeLinkProcessor(externalUrl)
.processForHtml(content);
}
return content;
})
.map(XmlCharUtils::removeInvalidXmlChar)
.orElseThrow();
itemElement.addElement("description").addCDATA(description);
Expand Down
4 changes: 2 additions & 2 deletions app/src/test/java/run/halo/feed/RSS2Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ void invalidCharTest() {
RSS2.Item.builder()
.title("title1")
.description("""
<p>并且会保留处理后的图片以供后面的访问。</p>
<p>&并且会保留处理后的图片以供后面的访问。</p>
""")
.link("link1")
.pubDate(Instant.EPOCH)
Expand Down Expand Up @@ -175,7 +175,7 @@ void invalidCharTest() {
</title>
<link>link1</link>
<description>
<![CDATA[<p>并且会保留处理后的图片以供后面的访问。</p>]]>
<![CDATA[<p>&并且会保留处理后的图片以供后面的访问。</p>]]>
</description>
<guid isPermaLink="false">guid1</guid>
<pubDate>Thu, 1 Jan 1970 00:00:00 GMT</pubDate>
Expand Down
39 changes: 39 additions & 0 deletions app/src/test/java/run/halo/feed/RelativeLinkProcessorTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package run.halo.feed;

import org.junit.jupiter.api.Test;

import static org.assertj.core.api.Assertions.assertThat;

/**
* Tests for {@link RelativeLinkProcessor}.
*
* @author guqing
* @since 1.4.1
*/
class RelativeLinkProcessorTest {
private final RelativeLinkProcessor linkProcessor =
new RelativeLinkProcessor("http://localhost:8090");

@Test
void textContent() {
var content = "hello world";
var processed = linkProcessor.processForHtml(content);
assertThat(processed).isEqualTo(content);
}

@Test
void testProcessForHtmlIncludeATag() {
var content = "<a href=\"/hello\">hello</a>";
var processed = linkProcessor.processForHtml(content);
assertThat(processed).isEqualTo("<a href=\"http://localhost:8090/hello\">hello</a>");
}

@Test
void processForHtmlIncludeImgTag() {
var content = "<img src=\"/hello.jpg\"/>";
var processed = linkProcessor.processForHtml(content);
assertThat(processed).isEqualTo(
"<img src=\"http://localhost:8090/apis/api.storage.halo"
+ ".run/v1alpha1/thumbnails/-/via-uri?uri=%2Fhello.jpg&amp;size=m\">");
}
}
Loading