35

I am validating URL with following regular expression. I want to validate google.com also but it returns false. What can be changed in R.E below to validate google.com.

console.log(learnRegExp('http://www.google-com.123')); // false
console.log(learnRegExp('https://www.google-com.com')); // true
console.log(learnRegExp('http://google-com.com')); // true
console.log(learnRegExp('http://google.com')); //true
console.log(learnRegExp('google.com')); //false

function learnRegExp(){
  return /^(ftp|https?):\/\/+(www\.)?[a-z0-9\-\.]{3,}\.[a-z]{3}$/.test(learnRegExp.arguments[0]);
}
4
  • are you trying to validate all url's or else you need only for 'google.com'? Commented Dec 29, 2011 at 11:14
  • 1
    Wrapping (ftp|https?):\/\/+(www\.)? with ( ... )? will do. However, does this regex really suit your needs? I mean: \.[a-z]{3}$ alone does exclude many valid URLs. Commented Dec 29, 2011 at 11:18
  • trying to validate all urls. ftp can be excluded. Commented Dec 29, 2011 at 11:29
  • possible duplicate of What is the best regular expression to check if a string is a valid URL? Commented Dec 29, 2011 at 12:07

6 Answers 6

113

This validate the URL in general

console.log('http://www.google-com.123.com', validateUrl('http://www.google-com.123.com')); // true 
console.log('http://www.google-com.123', validateUrl('http://www.google-com.123')); // false 
console.log('https://www.google-com.com', validateUrl('https://www.google-com.com')); // true 
console.log('http://google-com.com', validateUrl('http://google-com.com')); // true 
console.log('http://google.com', validateUrl('http://google.com')); //true 
console.log('google.com', validateUrl('google.com')); //false
console.log('http://www.gfh.', validateUrl('http://www.gfh.')); //false
console.log('http://www.gfh.c', validateUrl('http://www.gfh.c')); //false
console.log('http://www.gfh:800000', validateUrl('http://www.gfh:800000')); //false
console.log('www.google.com ', validateUrl('www.google.com ')); //false
console.log('http://google', validateUrl('http://google')); //false
console.log('//cdnblabla.cloudfront.net/css/app.css', validateUrl('//cdnblabla.cloudfront.net/css/app.css')); //true

function validateUrl(value) {
  return /^(?:(?:(?:https?|ftp):)?\/\/)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:[/?#]\S*)?$/i.test(value);
}

Should Match

["//www.google.com", "//cdnblabla.cloudfront.net/css/app.css", "http://✪df.ws/123", "http://userid:[email protected]:8080", "http://userid:[email protected]:8080/", "http://[email protected]", "http://[email protected]/", "http://[email protected]:8080", "http://[email protected]:8080/", "http://userid:[email protected]", "http://userid:[email protected]/", "http://142.42.1.1/", "http://142.42.1.1:8080/", "http://➡.ws/䨹", "http://⌘.ws", "http://⌘.ws/", "http://foo.com/blah_(wikipedia)#cite-1", "http://foo.com/blah_(wikipedia)_blah#cite-1", "http://foo.com/unicode_(✪)_in_parens", "http://foo.com/(something)?after=parens", "http://☺.damowmow.com/", "http://code.google.com/events/#&product=browser", "http://j.mp", "ftp://foo.bar/baz", "http://foo.bar/?q=Test%20URL-encoded%20stuff", "http://مثال.إختبار", "http://例子.测试"].map(function(url) {
  console.log(url, validateUrl(url));
});

function validateUrl(value) {
  return /^(?:(?:(?:https?|ftp):)?\/\/)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:[/?#]\S*)?$/i.test(value);
}

Should Fail

["http://", "http://.", "http://..", "http://../", "http://?", "http://??", "http://??/", "http://#", "http://##", "http://##/", "http://foo.bar?q=Spaces should be encoded", "//", "//a", "///a", "///", "http:///a", "foo.com", "rdar://1234", "h://test", "http:// shouldfail.com", ":// should fail", "http://foo.bar/foo(bar)baz quux", "ftps://foo.bar/", "http://-error-.invalid/", "http://-a.b.co", "http://a.b-.co", "http://0.0.0.0", "http://10.1.1.0", "http://10.1.1.255", "http://224.1.1.1", "http://1.1.1.1.1", "http://123.123.123", "http://3628126748", "http://.www.foo.bar/", "http://www.foo.bar./", "http://.www.foo.bar./", "http://10.1.1.1", "http://10.1.1.254"].map(function(url) {
  console.log(url, validateUrl(url));
});

function validateUrl(value) {
  return /^(?:(?:(?:https?|ftp):)?\/\/)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:[/?#]\S*)?$/i.test(value);
}

How it works

// protocol identifier
"(?:(?:(?:https?|ftp):)?//)"
// user:pass authentication
"(?:\\S+(?::\\S*)?@)?"
"(?:"
// IP address exclusion
// private & local networks
"(?!(?:10|127)(?:\\.\\d{1,3}){3})"
"(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})"
"(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})"
// IP address dotted notation octets
// excludes loopback network 0.0.0.0
// excludes reserved space >= 224.0.0.0
// excludes network & broacast addresses
// (first & last IP address of each class)
"(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])"
"(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}"
"(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))"
"|"
// host name
"(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)"
// domain name
"(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*"
// TLD identifier
"(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))"
// port number
"(?::\\d{2,5})?"
// resource path
"(?:[/?#]\\S*)?"

All this comes from this gist, i hope that this fill all your needs

Sign up to request clarification or add additional context in comments.

9 Comments

I applaud you for or the longest and most incomprehensible regex I've ever seen.
Also this fits the OP needs, it does not validate URL in general.
this fails for localhost and any of kind 127.0.0.1
Doesn't match localhost. Do not use.
@V.J. then, convert url to lowerCase before validating. you could use .toLowerCase() in javascript on a string value
|
9

This is perfect for me. I hope it will be perfect for someone else! :)

/^((https?):\/\/)?([w|W]{3}\.)+[a-zA-Z0-9\-\.]{3,}\.[a-zA-Z]{2,}(\.[a-zA-Z]{2,})?$/

5 Comments

What does this answer provide that the earlier answers don't already provide?
@Louis which previous answers?
this is much better than the most voted answer which doesn't work cause accepts anything like ww.google or www.google or www.google.c ....but yours works perfectly....nice job
This also only accepts URLs in the www subdomain. URls such as developers.facebook.com will not pass this check. Replacing + with * fixes that though.
@Steve yes but then this would pass https://.google.com
2

Here you go, you need to make "ftp/http(s)://" not a MUST. Use "?" for this.

function learnRegExp(){
  return /((ftp|https?):\/\/)?(www\.)?[a-z0-9\-\.]{3,}\.[a-z]{3}$/.test(learnRegExp.arguments[0]);
}

3 Comments

As you can see, I only wrapped your (ftp|https?):\/\/ into ()?.
How do I make it not to match a http / https?
@San Replace (ftp|https?) with ftp
1
/(http|https):\/\/(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%@!\-\/]))?/

1 Comment

Looks like google.com is not validating.Added some code to validate. :-)
0

^... symbol is saying begins with so the final log makes sense, i.e. the string does not begin with ftp or http(s). Youre also saying with ...$ that the end that the string must finish with three letters which again where it fails (line 2) it does not end like this. Some minor adjustments and you should be there.

Comments

0

Why not have multiple regex for each case?

  1. Valid alphanumeric url: /^https?:\/\/([\w\d\-]+\.)+\w{2,}(\/.+)?$/

    Works with http://sub_do-main.a.co to things like https://a.a.a.a.aa/my-awesome_url?asd=12. You can try it at: https://regex101.com/r/oXFuGy/2

  2. IPv4, short and fast but not 100% accurate (it is used by validator.js): /^(\d{1,3}(\.|$)){4}/. It allows things like 999.999.999.999.

  3. IPv4, larger, slower but 100% accurate: ^(((25[0-5])|(2[0-4]\d)|(1\d{2})|(\d{1,2}))\.){3}(((25[0-5])|(2[0-4]\d)|(1\d{2})|(\d{1,2})))$. Found it here: https://stackoverflow.com/a/50650510/2862917.
  4. IPv6 (I don't know exactly if this is the best/accurate/faster approach: ^(([\da-fA-F]{0,4}:){1,7}[\da-fA-F]{0,4})$. Found it here: Regular expression that matches valid IPv6 addresses which has as a first answer why would be better to stop using regex to try to validate IP's (at least those v6).

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.