Index: trunk/extensions/Collection/Collection.php |
— | — | @@ -101,6 +101,10 @@ |
102 | 102 | |
103 | 103 | $wgCollectionMaxSuggestions = 10; |
104 | 104 | |
| 105 | +$wgCollectionSuggestCheapWeightThreshhold = 50; |
| 106 | + |
| 107 | +$wgCollectionSuggestThreshhold = 100; |
| 108 | + |
105 | 109 | # ============================================================================== |
106 | 110 | |
107 | 111 | $wgExtensionCredits['specialpage'][] = array( |
Index: trunk/extensions/Collection/Collection.suggest.php |
— | — | @@ -158,6 +158,7 @@ |
159 | 159 | private static function getCollectionSuggestTemplate( $mode, $param ) { |
160 | 160 | global $wgCollectionMaxSuggestions; |
161 | 161 | |
| 162 | + |
162 | 163 | switch($mode) { |
163 | 164 | case 'add': |
164 | 165 | SpecialCollection::addArticleFromName(NS_MAIN, $param); |
— | — | @@ -326,10 +327,14 @@ |
327 | 328 | |
328 | 329 | // Check if all articles form the book are in $mLinkList |
329 | 330 | private function addCollectionArticles() { |
330 | | - global $wgCollectionSuggestCountWordsThreshold; |
| 331 | + global $wgCollectionSuggestThreshhold; |
331 | 332 | |
332 | 333 | $numItems = count( $this->mColl['items'] ); |
333 | 334 | |
| 335 | + if ( $numItems > $wgCollectionSuggestThreshhold ) { |
| 336 | + return; |
| 337 | + } |
| 338 | + |
334 | 339 | foreach( $this->mColl['items'] as $item ) { |
335 | 340 | if ( $this->searchEntry( $item['title'], $this->mLinkList ) === false |
336 | 341 | && $item['type'] == 'article' |
— | — | @@ -344,7 +349,7 @@ |
345 | 350 | |
346 | 351 | $this->mLinkList[] = array( |
347 | 352 | 'name' => $articleName, |
348 | | - 'links' => $this->getWeightedLinks( $article->getContent() ), |
| 353 | + 'links' => $this->getWeightedLinks( $numItems, $article->getContent() ), |
349 | 354 | ); |
350 | 355 | } |
351 | 356 | } |
— | — | @@ -380,7 +385,9 @@ |
381 | 386 | * @param wikitext: article text |
382 | 387 | * @return an array with links and their weights |
383 | 388 | */ |
384 | | - private function getWeightedLinks( $wikitext ) { |
| 389 | + private function getWeightedLinks( $num_articles, $wikitext ) { |
| 390 | + global $wgCollectionSuggestCheapWeightThreshhold; |
| 391 | + |
385 | 392 | $allLinks = array(); |
386 | 393 | preg_match_all( |
387 | 394 | '/\[\[(.+?)\]\]/', |
— | — | @@ -428,43 +435,56 @@ |
429 | 436 | } |
430 | 437 | |
431 | 438 | $linkcount = array(); |
432 | | - foreach ( $linkmap as $alias => $linked ) { |
433 | | - $matches = array(); |
434 | | - preg_match_all( |
435 | | - '/\W' . preg_quote( $alias, '/' ) . '\W/i', |
436 | | - $wikitext, |
437 | | - $matches |
438 | | - ); |
439 | | - $num = count( $matches[0] ); |
| 439 | + if ( $num_articles < $wgCollectionSuggestCheapWeightThreshhold ) { |
| 440 | + // more expensive algorithm: count words |
| 441 | + foreach ( $linkmap as $alias => $linked ) { |
| 442 | + $matches = array(); |
| 443 | + preg_match_all( |
| 444 | + '/\W' . preg_quote( $alias, '/' ) . '\W/i', |
| 445 | + $wikitext, |
| 446 | + $matches |
| 447 | + ); |
| 448 | + $num = count( $matches[0] ); |
440 | 449 | |
441 | | - foreach ( $linked as $link => $dummy ) { |
442 | | - if ( isset( $linkcount[$link] ) ) { |
443 | | - $linkcount[$link] += $num; |
444 | | - } else { |
445 | | - $linkcount[$link] = $num; |
| 450 | + foreach ( $linked as $link => $dummy ) { |
| 451 | + if ( isset( $linkcount[$link] ) ) { |
| 452 | + $linkcount[$link] += $num; |
| 453 | + } else { |
| 454 | + $linkcount[$link] = $num; |
| 455 | + } |
446 | 456 | } |
447 | 457 | } |
448 | | - } |
| 458 | + |
| 459 | + if ( count( $linkcount ) == 0 ) { |
| 460 | + return array(); |
| 461 | + } |
449 | 462 | |
450 | | - if ( count( $linkcount ) == 0 ) { |
451 | | - return array(); |
452 | | - } |
| 463 | + // normalize: |
| 464 | + $lc_max = 0; |
| 465 | + foreach ( $linkcount as $link => $count ) { |
| 466 | + if ( $count > $lc_max) { |
| 467 | + $lc_max = $count; |
| 468 | + } |
| 469 | + } |
| 470 | + $norm = log( $lc_max ); |
| 471 | + $result = array(); |
| 472 | + if ( $norm > 0 ) { |
| 473 | + foreach ( $linkcount as $link => $count ) { |
| 474 | + $result[$link] = 1 + 0.5*log($count)/$norm; |
| 475 | + } |
| 476 | + } |
453 | 477 | |
454 | | - // normalize: |
455 | | - $lc_max = 0; |
456 | | - foreach ( $linkcount as $link => $count ) { |
457 | | - if ( $num > $lc_max) { |
458 | | - $lc_max = $count; |
| 478 | + return $result; |
| 479 | + } else { |
| 480 | + // cheaper algorithm: just count links |
| 481 | + foreach ( $linkmap as $alias => $linked ) { |
| 482 | + foreach ( $linked as $link => $dummy) { |
| 483 | + $linkcount[$link] = 1; |
| 484 | + } |
459 | 485 | } |
| 486 | + |
| 487 | + return $linkcount; |
460 | 488 | } |
461 | | - $norm = log( $lc_max ); |
462 | | - $result = array(); |
463 | | - if ( $norm > 0 ) { |
464 | | - foreach ( $linkcount as $link => $count ) { |
465 | | - $result[$link] = 1 + 0.5*log($count)/$norm; |
466 | | - } |
467 | | - } |
468 | | - return $result; |
469 | 489 | } |
470 | 490 | |
471 | 491 | // Calculate the $mPropList from $mLinkList and $mBanList |