1.20.x link.module | link_validate_url($text) |
Validates a URL.
Accepts all URLs following RFC 1738 standard for URL formation and all e-mail addresses following the RFC 2368 standard for mailto address formation.
Parameters
string $text: Url to be validated.
Return value
mixed: Returns boolean FALSE if the URL is not valid. On success, returns one of the LINK_(linktype) constants.
File
- modules/
link/ link.module, line 1057 - Defines simple link field types.
Code
function link_validate_url($text) {
$allowed_protocols = settings_get('filter_allowed_protocols', array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'webcal'));
$cleaned_text = _link_escape_domain($text);
$protocol = '((?:' . implode("|", $allowed_protocols) . '):\/\/)';
$user_pass = '(?:(?:[a-z0-9\-_\w\.\+!$&\'\(\)*\+,;=])|(?:%[0-9a-f]{2}))+';
$authentication = '(?:' . $user_pass . '(?::' . $user_pass . ')?@)';
$domain = '(?:(?:[a-z0-9](?:[a-z0-9\-_])*\.)*(?:(?:[a-z0-9][a-z0-9\-]{1,62}\.)(?:[a-z0-9][a-z0-9\-]{1,62})))';
$ipv4 = '(?:[0-9]{1,3}(\.[0-9]{1,3}){3})';
$ipv6 = '(?:[0-9a-fA-F]{1,4}(\:[0-9a-fA-F]{1,4}){7})';
$port = '(?::([0-9]{1,5}))';
// Pattern specific to external links.
$external_pattern = '/^' . $protocol . '?' . $authentication . '?(?P<domain>' . $domain . '|' . $ipv4 . '|' . $ipv6 . '|localhost)' . $port . '?';
// Directories are loosely interpreted, only reserved characters *must* be
// encoded: https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding_reserved_characters
$directories = '(\/[^<>#"?]*)*';
// Query strings are more strict and must be all URL encoded, ASCII-only.
$query = '(\/?\?([?a-z0-9+_|\-\.~\/\\\\%=&,$\'!():;*@\[\]{} ]*))';
// Anchors must also be encoded, ASCII-only.
$anchor = '(#[a-z0-9_\-\.~+%=&,$\'():;*@\[\]\/\?]*)';
// The rest of the path for a standard URL.
$end = $directories . '?' . $query . '?' . $anchor . '?$/i';
// Add the end to the external pattern.
$external_pattern .= $end;
// Pattern specific to internal links.
$internal_pattern = '/^([a-z0-9_\-+\[\] ]+)' . $end;
$internal_pattern_file = '/^([a-z0-9_\-+\[\]\. \/\(\)][a-z0-9_\-+\[\]\. \(\)][a-z0-9_\-+\[\]\. \/\(\)]+)$/i';
$message_id = '[^@].*@' . $domain;
$newsgroup_name = '([0-9a-z+-]*\.)*[0-9a-z+-]*';
$news_pattern = '/^news:(' . $newsgroup_name . '|' . $message_id . ')$/i';
$email_user = '([^ @\-][^ @]*)';
$email_pattern = '/^mailto:' . $email_user . '@(' . $domain . '|' . $ipv4 . '|' . $ipv6 . '|localhost)' . $query . '?$/';
if (strcmp($text, '<front>') === 0) {
return LINK_FRONT;
}
if (in_array('mailto', $allowed_protocols) && preg_match($email_pattern, $cleaned_text)) {
return LINK_EMAIL;
}
if (in_array('news', $allowed_protocols) && preg_match($news_pattern, $cleaned_text)) {
return LINK_NEWS;
}
if (preg_match($internal_pattern, $cleaned_text)) {
return LINK_INTERNAL;
}
if (preg_match($external_pattern, $cleaned_text)) {
return LINK_EXTERNAL;
}
if (preg_match($internal_pattern_file, $cleaned_text)) {
return LINK_INTERNAL;
}
return FALSE;
}