r55969 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r55968‎ | r55969 | r55970 >
Date:14:36, 7 September 2009
Author:jojo
Status:deferred
Tags:
Comment:
fallback to a cheaper algorithm for >50 pages. turn off for >100.
Modified paths:
  • /trunk/extensions/Collection/Collection.php (modified) (history)
  • /trunk/extensions/Collection/Collection.suggest.php (modified) (history)

Diff [purge]

Index: trunk/extensions/Collection/Collection.php
@@ -101,6 +101,10 @@
102102
103103 $wgCollectionMaxSuggestions = 10;
104104
 105+$wgCollectionSuggestCheapWeightThreshhold = 50;
 106+
 107+$wgCollectionSuggestThreshhold = 100;
 108+
105109 # ==============================================================================
106110
107111 $wgExtensionCredits['specialpage'][] = array(
Index: trunk/extensions/Collection/Collection.suggest.php
@@ -158,6 +158,7 @@
159159 private static function getCollectionSuggestTemplate( $mode, $param ) {
160160 global $wgCollectionMaxSuggestions;
161161
 162+
162163 switch($mode) {
163164 case 'add':
164165 SpecialCollection::addArticleFromName(NS_MAIN, $param);
@@ -326,10 +327,14 @@
327328
328329 // Check if all articles form the book are in $mLinkList
329330 private function addCollectionArticles() {
330 - global $wgCollectionSuggestCountWordsThreshold;
 331+ global $wgCollectionSuggestThreshhold;
331332
332333 $numItems = count( $this->mColl['items'] );
333334
 335+ if ( $numItems > $wgCollectionSuggestThreshhold ) {
 336+ return;
 337+ }
 338+
334339 foreach( $this->mColl['items'] as $item ) {
335340 if ( $this->searchEntry( $item['title'], $this->mLinkList ) === false
336341 && $item['type'] == 'article'
@@ -344,7 +349,7 @@
345350
346351 $this->mLinkList[] = array(
347352 'name' => $articleName,
348 - 'links' => $this->getWeightedLinks( $article->getContent() ),
 353+ 'links' => $this->getWeightedLinks( $numItems, $article->getContent() ),
349354 );
350355 }
351356 }
@@ -380,7 +385,9 @@
381386 * @param wikitext: article text
382387 * @return an array with links and their weights
383388 */
384 - private function getWeightedLinks( $wikitext ) {
 389+ private function getWeightedLinks( $num_articles, $wikitext ) {
 390+ global $wgCollectionSuggestCheapWeightThreshhold;
 391+
385392 $allLinks = array();
386393 preg_match_all(
387394 '/\[\[(.+?)\]\]/',
@@ -428,43 +435,56 @@
429436 }
430437
431438 $linkcount = array();
432 - foreach ( $linkmap as $alias => $linked ) {
433 - $matches = array();
434 - preg_match_all(
435 - '/\W' . preg_quote( $alias, '/' ) . '\W/i',
436 - $wikitext,
437 - $matches
438 - );
439 - $num = count( $matches[0] );
 439+ if ( $num_articles < $wgCollectionSuggestCheapWeightThreshhold ) {
 440+ // more expensive algorithm: count words
 441+ foreach ( $linkmap as $alias => $linked ) {
 442+ $matches = array();
 443+ preg_match_all(
 444+ '/\W' . preg_quote( $alias, '/' ) . '\W/i',
 445+ $wikitext,
 446+ $matches
 447+ );
 448+ $num = count( $matches[0] );
440449
441 - foreach ( $linked as $link => $dummy ) {
442 - if ( isset( $linkcount[$link] ) ) {
443 - $linkcount[$link] += $num;
444 - } else {
445 - $linkcount[$link] = $num;
 450+ foreach ( $linked as $link => $dummy ) {
 451+ if ( isset( $linkcount[$link] ) ) {
 452+ $linkcount[$link] += $num;
 453+ } else {
 454+ $linkcount[$link] = $num;
 455+ }
446456 }
447457 }
448 - }
 458+
 459+ if ( count( $linkcount ) == 0 ) {
 460+ return array();
 461+ }
449462
450 - if ( count( $linkcount ) == 0 ) {
451 - return array();
452 - }
 463+ // normalize:
 464+ $lc_max = 0;
 465+ foreach ( $linkcount as $link => $count ) {
 466+ if ( $count > $lc_max) {
 467+ $lc_max = $count;
 468+ }
 469+ }
 470+ $norm = log( $lc_max );
 471+ $result = array();
 472+ if ( $norm > 0 ) {
 473+ foreach ( $linkcount as $link => $count ) {
 474+ $result[$link] = 1 + 0.5*log($count)/$norm;
 475+ }
 476+ }
453477
454 - // normalize:
455 - $lc_max = 0;
456 - foreach ( $linkcount as $link => $count ) {
457 - if ( $num > $lc_max) {
458 - $lc_max = $count;
 478+ return $result;
 479+ } else {
 480+ // cheaper algorithm: just count links
 481+ foreach ( $linkmap as $alias => $linked ) {
 482+ foreach ( $linked as $link => $dummy) {
 483+ $linkcount[$link] = 1;
 484+ }
459485 }
 486+
 487+ return $linkcount;
460488 }
461 - $norm = log( $lc_max );
462 - $result = array();
463 - if ( $norm > 0 ) {
464 - foreach ( $linkcount as $link => $count ) {
465 - $result[$link] = 1 + 0.5*log($count)/$norm;
466 - }
467 - }
468 - return $result;
469489 }
470490
471491 // Calculate the $mPropList from $mLinkList and $mBanList

Status & tagging log