Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#1764]feature: add advanced urlcheck #1157

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions framework/src/play/data/validation/URL.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,17 @@
public @interface URL {

String message() default URLCheck.mes;

/**
* TLDs have been made mandatory so single names like "localhost" fails'
* The default regex was built to match URLs having a real domain name (at least 2 labels separated by a dot).
* see: https://gist.github.com/dperini/729294
*/
boolean tldMandatory() default true;

/**
* exclude loopback address space
*/
boolean excludeLoopback() default true;
}

64 changes: 60 additions & 4 deletions framework/src/play/data/validation/URLCheck.java
Original file line number Diff line number Diff line change
@@ -1,28 +1,84 @@
package play.data.validation;

import java.util.regex.Pattern;
import net.sf.oval.Validator;
import net.sf.oval.configuration.annotation.AbstractAnnotationCheck;
import net.sf.oval.context.OValContext;

import java.util.regex.Pattern;

@SuppressWarnings("serial")
public class URLCheck extends AbstractAnnotationCheck<URL> {

static final String mes = "validation.url";
static Pattern urlPattern = Pattern.compile("^(http|https|ftp)\\://[a-zA-Z0-9\\-\\.]+\\.[a-z" +
"A-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\\-\\._\\?\\,\\'/\\\\\\+&amp;%\\$#\\=~\\!])*$");

boolean tldMandatory;
boolean excludeLoopback;

/**
* big thank you to https://gist.github.com/dperini/729294
* well suited url pattern for most of internet routeable ips and real domains
*/
static String[] regexFragments = {
/* 00 */ "^",
// protocol identifier
/* 01 */ "(?:(?:https?|s?ftp|rtsp|mms)://)",
// user:pass authentication
/* 02 */ "(?:\\S+(?::\\S*)?@)?",
/* 03 */ "(?:",
// IP address exclusion
// private & local networks
/* 04 */ "(?!(?:10|127)(?:\\.\\d{1,3}){3})",
/* 05 */ "(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})",
/* 06 */ "(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})",
// IP address dotted notation octets
// excludes loopback network 0.0.0.0
// excludes reserved space >= 224.0.0.0
// excludes network & broacast addresses
// (first & last IP address of each class)
/* 07 */ "(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])",
/* 08 */ "(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}",
/* 09 */ "(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))",
/* 10 */ "|",
// host name
/* 11 */ "(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)",
// domain name
/* 12 */ "(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*",
// TLD identifier
/* 13 */ "(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))",
// TLD may end with dot
/* 14 */ "\\.?",
/* 15 */ ")",
// port number
/* 16 */ "(?::\\d{2,5})?",
// resource path
/* 17 */ "(?:[/?#]\\S*)?",
/* 18 */ "$"
};
static Pattern urlPattern = Pattern.compile(String.join("", regexFragments), Pattern.CASE_INSENSITIVE);

@Override
public void configure(URL url) {
setMessage(url.message());
this.tldMandatory = url.tldMandatory();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need to store 2 fields? Can't we just store instance of URL?
Like

public void configure(URL url) {
      this.url = url;
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not know if that is a good way. Maybe ask the author of OVal why he not just copies a reference of the annotations in all of his checks.
https://github.com/sebthom/oval/tree/1.32/src/main/java/net/sf/oval/constraint

this.excludeLoopback = url.excludeLoopback();
}

@Override
public boolean isSatisfied(Object validatedObject, Object value, OValContext context, Validator validator) {
if (value == null || value.toString().length() == 0) {
return true;
}
return urlPattern.matcher(value.toString()).matches();
if (!tldMandatory || !excludeLoopback) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to change order of if/else. It would make code easier to read.

if (tldMandatory && excludeLoopback) {
  return urlPattern.matcher(value.toString()).matches();
}
// ... and here come all the other magic...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes maybe nicer but that is cosmetic and for me i like the most frequent return at the end of the method.

//slow for special cases
String[] localRegexFragments = new String[regexFragments.length];
System.arraycopy(regexFragments, 0, localRegexFragments, 0, regexFragments.length);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Creating every time a new array seems to be too resource-consuming. And the code is not really simple.

Can't we just hold 4 regexps in 4 constants? I mean

Pattern localUrlPattern_false_false = Pattern.compile(...);
Pattern localUrlPattern_false_true = Pattern.compile(...);
Pattern localUrlPattern_true_false = Pattern.compile(...);
Pattern localUrlPattern_true_true = Pattern.compile(...);

Copy link
Contributor Author

@flybyray flybyray Jun 2, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the if part is only the special case, it will not be called every time

see URL annotation.
tldMandatory default true
excludeLoopback default true

And yes you can do x-regexps. but maybe someone will add someday additional special cases and we need more patterns, then i will ask a similar question Do we really need to store x-fields? for all those patterns?
But maybe it is better to use the configure method to initiate only once the correct pattern and just use it in satisfy.

if (!excludeLoopback) localRegexFragments[4] = "(?!(?:10)(?:\\.\\d{1,3}){3})";
if (!tldMandatory) localRegexFragments[13] = "";
Pattern localUrlPattern = Pattern.compile(String.join("", localRegexFragments), Pattern.CASE_INSENSITIVE);
return localUrlPattern.matcher(value.toString()).matches();
} else {
return urlPattern.matcher(value.toString()).matches();
}
}

}
15 changes: 13 additions & 2 deletions framework/src/play/data/validation/Validation.java
Original file line number Diff line number Diff line change
Expand Up @@ -400,14 +400,25 @@ public ValidationResult email(Object o) {
return Validation.email(key, o);
}

public static ValidationResult url(String key, Object o) {
public static ValidationResult url(String key, Object o, boolean tldMandatory, boolean excludeLoopback) {
URLCheck check = new URLCheck();
check.tldMandatory = tldMandatory;
check.excludeLoopback = excludeLoopback;
return applyCheck(check, key, o);
}

public static ValidationResult url(String key, Object o) {
return url(key, o, true, true);
}

public ValidationResult url(Object o, boolean tldMandatory, boolean excludeLoopback ) {
String key = getLocalName(o);
return Validation.url(key, o, tldMandatory, excludeLoopback);
}

public ValidationResult url(Object o) {
String key = getLocalName(o);
return Validation.url(key, o);
return Validation.url(key, o, true, true);
}

public static ValidationResult phone(String key, Object o) {
Expand Down
89 changes: 89 additions & 0 deletions framework/test-src/play/data/validation/URLTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package play.data.validation;

import org.junit.Test;
import play.i18n.MessagesBuilder;

import static org.junit.Assert.assertEquals;

public class URLTest {
@Test
public void validURLValidationTest() {
new MessagesBuilder().build();
Validation.current.set(new Validation());

assertValidURL(true, "http://foo.com/blah_blah");
assertValidURL(true, "http://foo.com/blah_blah/");
assertValidURL(true, "http://foo.com/blah_blah_(wikipedia)");
assertValidURL(true, "http://foo.com/blah_blah_(wikipedia)_(again)");
assertValidURL(true, "http://www.example.com/wpstyle/?p=364");
assertValidURL(true, "https://www.example.com/foo/?bar=baz&inga=42&quux");
assertValidURL(true, "http://✪df.ws/123");
assertValidURL(true, "http://userid:[email protected]:8080");
assertValidURL(true, "http://userid:[email protected]:8080/");
assertValidURL(true, "http://[email protected]");
assertValidURL(true, "http://[email protected]/");
assertValidURL(true, "http://[email protected]:8080");
assertValidURL(true, "http://[email protected]:8080/");
assertValidURL(true, "http://userid:[email protected]");
assertValidURL(true, "http://userid:[email protected]/");
assertValidURL(true, "http://142.42.1.1/");
assertValidURL(true, "http://142.42.1.1:8080/");
assertValidURL(true, "http://➡.ws/䨹");
assertValidURL(true, "http://⌘.ws");
assertValidURL(true, "http://⌘.ws/");
assertValidURL(true, "http://foo.com/blah_(wikipedia)#cite-1");
assertValidURL(true, "http://foo.com/blah_(wikipedia)_blah#cite-1");
assertValidURL(true, "http://foo.com/unicode_(✪)_in_parens");
assertValidURL(true, "http://foo.com/(something)?after=parens");
assertValidURL(true, "http://☺.damowmow.com/");
assertValidURL(true, "http://code.google.com/events/#&product=browser");
assertValidURL(true, "http://j.mp");
assertValidURL(true, "ftp://foo.bar/baz");
assertValidURL(true, "http://foo.bar/?q=Test%20URL-encoded%20stuff");
assertValidURL(true, "http://مثال.إختبار");
assertValidURL(true, "http://例子.测试");
assertValidURL(true, "http://उदाहरण.परीक्षा");
assertValidURL(true, "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com");
assertValidURL(true, "http://1337.net");
assertValidURL(true, "http://a.b-c.de");
assertValidURL(true, "http://223.255.255.254");
}

/**
* see ticket https://play.lighthouseapp.com/projects/57987-play-framework/tickets/1764-url-regex-is-not-correct
*/
@Test
public void validURL_1764_Test() {
new MessagesBuilder().build();
Validation.current.set(new Validation());

assertValidURL(false, "http://localhost/");
assertValidURL(false, "http://LOCALHOST/");
assertValidURL(false, "http://localhost:80/");

assertValidURL(true, "http://localhost/", false, true);
assertValidURL(true, "http://LOCALHOST/", false, true);
assertValidURL(true, "https://LOCALHOST/", false, true);
assertValidURL(true, "ftp://LOCALHOST/", false, true);
assertValidURL(true, "sftp://LOCALHOST/", false, true);
assertValidURL(true, "rtsp://LOCALHOST/", false, true);
assertValidURL(true, "mms://LOCALHOST/", false, true);
assertValidURL(true, "http://localhost:80/", false, true);

assertValidURL(false, "http://127.0.0.1");
assertValidURL(false, "http://127.0.0.1:80");

assertValidURL(true, "http://127.0.0.1", true, false);
assertValidURL(true, "http://127.0.0.1:80", true, false);
}

private void assertValidURL(boolean valid, String url, boolean tldMandatory, boolean excludeLoopback) {
Validation.clear();
Validation.ValidationResult result = Validation.url("url", url, tldMandatory, excludeLoopback);
assertEquals("Validation url [" + url + "] should be " + valid, valid, result.ok);
}

private void assertValidURL(Boolean valid, String url) {
assertValidURL(valid, url, true, true);
}
}