r90222 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r90221‎ | r90222 | r90223 >
Date:19:09, 16 June 2011
Author:ialex
Status:ok
Tags:
Comment:
Groupped URL-related functions
Modified paths:
  • /trunk/phase3/includes/GlobalFunctions.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/GlobalFunctions.php
@@ -312,6 +312,243 @@
313313 }
314314
315315 /**
 316+ * This function takes two arrays as input, and returns a CGI-style string, e.g.
 317+ * "days=7&limit=100". Options in the first array override options in the second.
 318+ * Options set to "" will not be output.
 319+ *
 320+ * @param $array1 Array( String|Array )
 321+ * @param $array2 Array( String|Array )
 322+ * @return String
 323+ */
 324+function wfArrayToCGI( $array1, $array2 = null ) {
 325+ if ( !is_null( $array2 ) ) {
 326+ $array1 = $array1 + $array2;
 327+ }
 328+
 329+ $cgi = '';
 330+ foreach ( $array1 as $key => $value ) {
 331+ if ( $value !== '' ) {
 332+ if ( $cgi != '' ) {
 333+ $cgi .= '&';
 334+ }
 335+ if ( is_array( $value ) ) {
 336+ $firstTime = true;
 337+ foreach ( $value as $v ) {
 338+ $cgi .= ( $firstTime ? '' : '&') .
 339+ urlencode( $key . '[]' ) . '=' .
 340+ urlencode( $v );
 341+ $firstTime = false;
 342+ }
 343+ } else {
 344+ if ( is_object( $value ) ) {
 345+ $value = $value->__toString();
 346+ }
 347+ $cgi .= urlencode( $key ) . '=' .
 348+ urlencode( $value );
 349+ }
 350+ }
 351+ }
 352+ return $cgi;
 353+}
 354+
 355+/**
 356+ * This is the logical opposite of wfArrayToCGI(): it accepts a query string as
 357+ * its argument and returns the same string in array form. This allows compa-
 358+ * tibility with legacy functions that accept raw query strings instead of nice
 359+ * arrays. Of course, keys and values are urldecode()d. Don't try passing in-
 360+ * valid query strings, or it will explode.
 361+ *
 362+ * @param $query String: query string
 363+ * @return array Array version of input
 364+ */
 365+function wfCgiToArray( $query ) {
 366+ if( isset( $query[0] ) && $query[0] == '?' ) {
 367+ $query = substr( $query, 1 );
 368+ }
 369+ $bits = explode( '&', $query );
 370+ $ret = array();
 371+ foreach( $bits as $bit ) {
 372+ if( $bit === '' ) {
 373+ continue;
 374+ }
 375+ list( $key, $value ) = explode( '=', $bit );
 376+ $key = urldecode( $key );
 377+ $value = urldecode( $value );
 378+ $ret[$key] = $value;
 379+ }
 380+ return $ret;
 381+}
 382+
 383+/**
 384+ * Append a query string to an existing URL, which may or may not already
 385+ * have query string parameters already. If so, they will be combined.
 386+ *
 387+ * @param $url String
 388+ * @param $query Mixed: string or associative array
 389+ * @return string
 390+ */
 391+function wfAppendQuery( $url, $query ) {
 392+ if ( is_array( $query ) ) {
 393+ $query = wfArrayToCGI( $query );
 394+ }
 395+ if( $query != '' ) {
 396+ if( false === strpos( $url, '?' ) ) {
 397+ $url .= '?';
 398+ } else {
 399+ $url .= '&';
 400+ }
 401+ $url .= $query;
 402+ }
 403+ return $url;
 404+}
 405+
 406+/**
 407+ * Expand a potentially local URL to a fully-qualified URL. Assumes $wgServer
 408+ * is correct.
 409+ *
 410+ * @todo this won't work with current-path-relative URLs
 411+ * like "subdir/foo.html", etc.
 412+ *
 413+ * @param $url String: either fully-qualified or a local path + query
 414+ * @return string Fully-qualified URL
 415+ */
 416+function wfExpandUrl( $url ) {
 417+ global $wgServer;
 418+ if( substr( $url, 0, 2 ) == '//' ) {
 419+ $bits = wfParseUrl( $wgServer );
 420+ $scheme = $bits ? $bits['scheme'] : 'http';
 421+ return $scheme . ':' . $url;
 422+ } elseif( substr( $url, 0, 1 ) == '/' ) {
 423+ return $wgServer . $url;
 424+ } else {
 425+ return $url;
 426+ }
 427+}
 428+
 429+/**
 430+ * Returns a regular expression of url protocols
 431+ *
 432+ * @return String
 433+ */
 434+function wfUrlProtocols() {
 435+ global $wgUrlProtocols;
 436+
 437+ static $retval = null;
 438+ if ( !is_null( $retval ) ) {
 439+ return $retval;
 440+ }
 441+
 442+ // Support old-style $wgUrlProtocols strings, for backwards compatibility
 443+ // with LocalSettings files from 1.5
 444+ if ( is_array( $wgUrlProtocols ) ) {
 445+ $protocols = array();
 446+ foreach ( $wgUrlProtocols as $protocol ) {
 447+ $protocols[] = preg_quote( $protocol, '/' );
 448+ }
 449+
 450+ $retval = implode( '|', $protocols );
 451+ } else {
 452+ $retval = $wgUrlProtocols;
 453+ }
 454+ return $retval;
 455+}
 456+
 457+/**
 458+ * parse_url() work-alike, but non-broken. Differences:
 459+ *
 460+ * 1) Does not raise warnings on bad URLs (just returns false)
 461+ * 2) Handles protocols that don't use :// (e.g., mailto: and news:) correctly
 462+ * 3) Adds a "delimiter" element to the array, either '://' or ':' (see (2))
 463+ *
 464+ * @param $url String: a URL to parse
 465+ * @return Array: bits of the URL in an associative array, per PHP docs
 466+ */
 467+function wfParseUrl( $url ) {
 468+ global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php
 469+ wfSuppressWarnings();
 470+ $bits = parse_url( $url );
 471+ wfRestoreWarnings();
 472+ if ( !$bits ) {
 473+ return false;
 474+ }
 475+
 476+ // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
 477+ if ( in_array( $bits['scheme'] . '://', $wgUrlProtocols ) ) {
 478+ $bits['delimiter'] = '://';
 479+ } elseif ( in_array( $bits['scheme'] . ':', $wgUrlProtocols ) ) {
 480+ $bits['delimiter'] = ':';
 481+ // parse_url detects for news: and mailto: the host part of an url as path
 482+ // We have to correct this wrong detection
 483+ if ( isset( $bits['path'] ) ) {
 484+ $bits['host'] = $bits['path'];
 485+ $bits['path'] = '';
 486+ }
 487+ } else {
 488+ return false;
 489+ }
 490+
 491+ /* Provide an empty host for eg. file:/// urls (see bug 28627) */
 492+ if ( !isset( $bits['host'] ) ) {
 493+ $bits['host'] = '';
 494+
 495+ /* parse_url loses the third / for file:///c:/ urls (but not on variants) */
 496+ if ( substr( $bits['path'], 0, 1 ) !== '/' ) {
 497+ $bits['path'] = '/' . $bits['path'];
 498+ }
 499+ }
 500+ return $bits;
 501+}
 502+
 503+/**
 504+ * Make a URL index, appropriate for the el_index field of externallinks.
 505+ *
 506+ * @param $url String
 507+ * @return String
 508+ */
 509+function wfMakeUrlIndex( $url ) {
 510+ $bits = wfParseUrl( $url );
 511+
 512+ // Reverse the labels in the hostname, convert to lower case
 513+ // For emails reverse domainpart only
 514+ if ( $bits['scheme'] == 'mailto' ) {
 515+ $mailparts = explode( '@', $bits['host'], 2 );
 516+ if ( count( $mailparts ) === 2 ) {
 517+ $domainpart = strtolower( implode( '.', array_reverse( explode( '.', $mailparts[1] ) ) ) );
 518+ } else {
 519+ // No domain specified, don't mangle it
 520+ $domainpart = '';
 521+ }
 522+ $reversedHost = $domainpart . '@' . $mailparts[0];
 523+ } else {
 524+ $reversedHost = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) );
 525+ }
 526+ // Add an extra dot to the end
 527+ // Why? Is it in wrong place in mailto links?
 528+ if ( substr( $reversedHost, -1, 1 ) !== '.' ) {
 529+ $reversedHost .= '.';
 530+ }
 531+ // Reconstruct the pseudo-URL
 532+ $prot = $bits['scheme'];
 533+ $index = $prot . $bits['delimiter'] . $reversedHost;
 534+ // Leave out user and password. Add the port, path, query and fragment
 535+ if ( isset( $bits['port'] ) ) {
 536+ $index .= ':' . $bits['port'];
 537+ }
 538+ if ( isset( $bits['path'] ) ) {
 539+ $index .= $bits['path'];
 540+ } else {
 541+ $index .= '/';
 542+ }
 543+ if ( isset( $bits['query'] ) ) {
 544+ $index .= '?' . $bits['query'];
 545+ }
 546+ if ( isset( $bits['fragment'] ) ) {
 547+ $index .= '#' . $bits['fragment'];
 548+ }
 549+ return $index;
 550+}
 551+
 552+/**
316553 * Sends a line to the debug log if enabled or, optionally, to a comment in output.
317554 * In normal operation this is a NOP.
318555 *
@@ -1481,120 +1718,6 @@
14821719 }
14831720
14841721 /**
1485 - * This function takes two arrays as input, and returns a CGI-style string, e.g.
1486 - * "days=7&limit=100". Options in the first array override options in the second.
1487 - * Options set to "" will not be output.
1488 - *
1489 - * @param $array1 Array( String|Array )
1490 - * @param $array2 Array( String|Array )
1491 - * @return String
1492 - */
1493 -function wfArrayToCGI( $array1, $array2 = null ) {
1494 - if ( !is_null( $array2 ) ) {
1495 - $array1 = $array1 + $array2;
1496 - }
1497 -
1498 - $cgi = '';
1499 - foreach ( $array1 as $key => $value ) {
1500 - if ( $value !== '' ) {
1501 - if ( $cgi != '' ) {
1502 - $cgi .= '&';
1503 - }
1504 - if ( is_array( $value ) ) {
1505 - $firstTime = true;
1506 - foreach ( $value as $v ) {
1507 - $cgi .= ( $firstTime ? '' : '&') .
1508 - urlencode( $key . '[]' ) . '=' .
1509 - urlencode( $v );
1510 - $firstTime = false;
1511 - }
1512 - } else {
1513 - if ( is_object( $value ) ) {
1514 - $value = $value->__toString();
1515 - }
1516 - $cgi .= urlencode( $key ) . '=' .
1517 - urlencode( $value );
1518 - }
1519 - }
1520 - }
1521 - return $cgi;
1522 -}
1523 -
1524 -/**
1525 - * This is the logical opposite of wfArrayToCGI(): it accepts a query string as
1526 - * its argument and returns the same string in array form. This allows compa-
1527 - * tibility with legacy functions that accept raw query strings instead of nice
1528 - * arrays. Of course, keys and values are urldecode()d. Don't try passing in-
1529 - * valid query strings, or it will explode.
1530 - *
1531 - * @param $query String: query string
1532 - * @return array Array version of input
1533 - */
1534 -function wfCgiToArray( $query ) {
1535 - if( isset( $query[0] ) && $query[0] == '?' ) {
1536 - $query = substr( $query, 1 );
1537 - }
1538 - $bits = explode( '&', $query );
1539 - $ret = array();
1540 - foreach( $bits as $bit ) {
1541 - if( $bit === '' ) {
1542 - continue;
1543 - }
1544 - list( $key, $value ) = explode( '=', $bit );
1545 - $key = urldecode( $key );
1546 - $value = urldecode( $value );
1547 - $ret[$key] = $value;
1548 - }
1549 - return $ret;
1550 -}
1551 -
1552 -/**
1553 - * Append a query string to an existing URL, which may or may not already
1554 - * have query string parameters already. If so, they will be combined.
1555 - *
1556 - * @param $url String
1557 - * @param $query Mixed: string or associative array
1558 - * @return string
1559 - */
1560 -function wfAppendQuery( $url, $query ) {
1561 - if ( is_array( $query ) ) {
1562 - $query = wfArrayToCGI( $query );
1563 - }
1564 - if( $query != '' ) {
1565 - if( false === strpos( $url, '?' ) ) {
1566 - $url .= '?';
1567 - } else {
1568 - $url .= '&';
1569 - }
1570 - $url .= $query;
1571 - }
1572 - return $url;
1573 -}
1574 -
1575 -/**
1576 - * Expand a potentially local URL to a fully-qualified URL. Assumes $wgServer
1577 - * is correct.
1578 - *
1579 - * @todo this won't work with current-path-relative URLs
1580 - * like "subdir/foo.html", etc.
1581 - *
1582 - * @param $url String: either fully-qualified or a local path + query
1583 - * @return string Fully-qualified URL
1584 - */
1585 -function wfExpandUrl( $url ) {
1586 - global $wgServer;
1587 - if( substr( $url, 0, 2 ) == '//' ) {
1588 - $bits = wfParseUrl( $wgServer );
1589 - $scheme = $bits ? $bits['scheme'] : 'http';
1590 - return $scheme . ':' . $url;
1591 - } elseif( substr( $url, 0, 1 ) == '/' ) {
1592 - return $wgServer . $url;
1593 - } else {
1594 - return $url;
1595 - }
1596 -}
1597 -
1598 -/**
15991722 * Windows-compatible version of escapeshellarg()
16001723 * Windows doesn't recognise single-quotes in the shell, but the escapeshellarg()
16011724 * function puts single quotes in regardless of OS.
@@ -2449,34 +2572,6 @@
24502573 }
24512574
24522575 /**
2453 - * Returns a regular expression of url protocols
2454 - *
2455 - * @return String
2456 - */
2457 -function wfUrlProtocols() {
2458 - global $wgUrlProtocols;
2459 -
2460 - static $retval = null;
2461 - if ( !is_null( $retval ) ) {
2462 - return $retval;
2463 - }
2464 -
2465 - // Support old-style $wgUrlProtocols strings, for backwards compatibility
2466 - // with LocalSettings files from 1.5
2467 - if ( is_array( $wgUrlProtocols ) ) {
2468 - $protocols = array();
2469 - foreach ( $wgUrlProtocols as $protocol ) {
2470 - $protocols[] = preg_quote( $protocol, '/' );
2471 - }
2472 -
2473 - $retval = implode( '|', $protocols );
2474 - } else {
2475 - $retval = $wgUrlProtocols;
2476 - }
2477 - return $retval;
2478 -}
2479 -
2480 -/**
24812576 * Safety wrapper around ini_get() for boolean settings.
24822577 * The values returned from ini_get() are pre-normalized for settings
24832578 * set via php.ini or php_flag/php_admin_flag... but *not*
@@ -2766,101 +2861,6 @@
27672862 }
27682863
27692864 /**
2770 - * parse_url() work-alike, but non-broken. Differences:
2771 - *
2772 - * 1) Does not raise warnings on bad URLs (just returns false)
2773 - * 2) Handles protocols that don't use :// (e.g., mailto: and news:) correctly
2774 - * 3) Adds a "delimiter" element to the array, either '://' or ':' (see (2))
2775 - *
2776 - * @param $url String: a URL to parse
2777 - * @return Array: bits of the URL in an associative array, per PHP docs
2778 - */
2779 -function wfParseUrl( $url ) {
2780 - global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php
2781 - wfSuppressWarnings();
2782 - $bits = parse_url( $url );
2783 - wfRestoreWarnings();
2784 - if ( !$bits ) {
2785 - return false;
2786 - }
2787 -
2788 - // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
2789 - if ( in_array( $bits['scheme'] . '://', $wgUrlProtocols ) ) {
2790 - $bits['delimiter'] = '://';
2791 - } elseif ( in_array( $bits['scheme'] . ':', $wgUrlProtocols ) ) {
2792 - $bits['delimiter'] = ':';
2793 - // parse_url detects for news: and mailto: the host part of an url as path
2794 - // We have to correct this wrong detection
2795 - if ( isset( $bits['path'] ) ) {
2796 - $bits['host'] = $bits['path'];
2797 - $bits['path'] = '';
2798 - }
2799 - } else {
2800 - return false;
2801 - }
2802 -
2803 - /* Provide an empty host for eg. file:/// urls (see bug 28627) */
2804 - if ( !isset( $bits['host'] ) ) {
2805 - $bits['host'] = '';
2806 -
2807 - /* parse_url loses the third / for file:///c:/ urls (but not on variants) */
2808 - if ( substr( $bits['path'], 0, 1 ) !== '/' ) {
2809 - $bits['path'] = '/' . $bits['path'];
2810 - }
2811 - }
2812 - return $bits;
2813 -}
2814 -
2815 -/**
2816 - * Make a URL index, appropriate for the el_index field of externallinks.
2817 - *
2818 - * @param $url String
2819 - * @return String
2820 - */
2821 -function wfMakeUrlIndex( $url ) {
2822 - $bits = wfParseUrl( $url );
2823 -
2824 - // Reverse the labels in the hostname, convert to lower case
2825 - // For emails reverse domainpart only
2826 - if ( $bits['scheme'] == 'mailto' ) {
2827 - $mailparts = explode( '@', $bits['host'], 2 );
2828 - if ( count( $mailparts ) === 2 ) {
2829 - $domainpart = strtolower( implode( '.', array_reverse( explode( '.', $mailparts[1] ) ) ) );
2830 - } else {
2831 - // No domain specified, don't mangle it
2832 - $domainpart = '';
2833 - }
2834 - $reversedHost = $domainpart . '@' . $mailparts[0];
2835 - } else {
2836 - $reversedHost = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) );
2837 - }
2838 - // Add an extra dot to the end
2839 - // Why? Is it in wrong place in mailto links?
2840 - if ( substr( $reversedHost, -1, 1 ) !== '.' ) {
2841 - $reversedHost .= '.';
2842 - }
2843 - // Reconstruct the pseudo-URL
2844 - $prot = $bits['scheme'];
2845 - $index = $prot . $bits['delimiter'] . $reversedHost;
2846 - // Leave out user and password. Add the port, path, query and fragment
2847 - if ( isset( $bits['port'] ) ) {
2848 - $index .= ':' . $bits['port'];
2849 - }
2850 - if ( isset( $bits['path'] ) ) {
2851 - $index .= $bits['path'];
2852 - } else {
2853 - $index .= '/';
2854 - }
2855 - if ( isset( $bits['query'] ) ) {
2856 - $index .= '?' . $bits['query'];
2857 - }
2858 - if ( isset( $bits['fragment'] ) ) {
2859 - $index .= '#' . $bits['fragment'];
2860 - }
2861 - return $index;
2862 -}
2863 -
2864 -/**
28652865 * Do any deferred updates and clear the list
28662866 *
28672867 * @param $commit String: set to 'commit' to commit after every update to

Status & tagging log