1.20.x link.module link_validate_url($text)

Validates a URL.

Accepts all URLs following RFC 1738 standard for URL formation and all e-mail addresses following the RFC 2368 standard for mailto address formation.

Parameters

string $text: Url to be validated.

Return value

mixed: Returns boolean FALSE if the URL is not valid. On success, returns one of the LINK_(linktype) constants.

File

modules/link/link.module, line 1057
Defines simple link field types.

Code

function link_validate_url($text) {
  $allowed_protocols = settings_get('filter_allowed_protocols', array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'webcal'));
  $cleaned_text = _link_escape_domain($text);

  $protocol = '((?:' . implode("|", $allowed_protocols) . '):\/\/)';
  $user_pass = '(?:(?:[a-z0-9\-_\w\.\+!$&\'\(\)*\+,;=])|(?:%[0-9a-f]{2}))+';
  $authentication = '(?:' . $user_pass . '(?::' . $user_pass . ')?@)';
  $domain = '(?:(?:[a-z0-9](?:[a-z0-9\-_])*\.)*(?:(?:[a-z0-9][a-z0-9\-]{1,62}\.)(?:[a-z0-9][a-z0-9\-]{1,62})))';
  $ipv4 = '(?:[0-9]{1,3}(\.[0-9]{1,3}){3})';
  $ipv6 = '(?:[0-9a-fA-F]{1,4}(\:[0-9a-fA-F]{1,4}){7})';
  $port = '(?::([0-9]{1,5}))';

  // Pattern specific to external links.
  $external_pattern = '/^' . $protocol . '?' . $authentication . '?(?P<domain>' . $domain . '|' . $ipv4 . '|' . $ipv6 . '|localhost)' . $port . '?';

  // Directories are loosely interpreted, only reserved characters *must* be
  // encoded: https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding_reserved_characters
  $directories = '(\/[^<>#"?]*)*';

  // Query strings are more strict and must be all URL encoded, ASCII-only.
  $query = '(\/?\?([?a-z0-9+_|\-\.~\/\\\\%=&,$\'!():;*@\[\]{} ]*))';

  // Anchors must also be encoded, ASCII-only.
  $anchor = '(#[a-z0-9_\-\.~+%=&,$\'():;*@\[\]\/\?]*)';

  // The rest of the path for a standard URL.
  $end = $directories . '?' . $query . '?' . $anchor . '?$/i';

  // Add the end to the external pattern.
  $external_pattern .= $end;

  // Pattern specific to internal links.
  $internal_pattern = '/^([a-z0-9_\-+\[\] ]+)' . $end;
  $internal_pattern_file = '/^([a-z0-9_\-+\[\]\. \/\(\)][a-z0-9_\-+\[\]\. \(\)][a-z0-9_\-+\[\]\. \/\(\)]+)$/i';

  $message_id = '[^@].*@' . $domain;
  $newsgroup_name = '([0-9a-z+-]*\.)*[0-9a-z+-]*';
  $news_pattern = '/^news:(' . $newsgroup_name . '|' . $message_id . ')$/i';

  $email_user = '([^ @\-][^ @]*)';
  $email_pattern = '/^mailto:' . $email_user . '@(' . $domain . '|' . $ipv4 . '|' . $ipv6 . '|localhost)' . $query . '?$/';

  if (strcmp($text, '<front>') === 0) {
    return LINK_FRONT;
  }
  if (in_array('mailto', $allowed_protocols) && preg_match($email_pattern, $cleaned_text)) {
    return LINK_EMAIL;
  }
  if (in_array('news', $allowed_protocols) && preg_match($news_pattern, $cleaned_text)) {
    return LINK_NEWS;
  }
  if (preg_match($internal_pattern, $cleaned_text)) {
    return LINK_INTERNAL;
  }
  if (preg_match($external_pattern, $cleaned_text)) {
    return LINK_EXTERNAL;
  }
  if (preg_match($internal_pattern_file, $cleaned_text)) {
    return LINK_INTERNAL;
  }

  return FALSE;
}