Index: trunk/extensions/Wikilog/Wikilog.i18n.magic.php |
— | — | @@ -21,6 +21,7 @@ |
22 | 22 | 'wlk-subtitle' => array( 0, 'subtitle' ), |
23 | 23 | 'wlk-summary' => array( 1, 'summary' ), |
24 | 24 | 'wlk-hidden' => array( 0, 'hidden' ), |
| 25 | + 'wlk-more' => array( 0, 'more' ), |
25 | 26 | ); |
26 | 27 | |
27 | 28 | $magicWords['ml'] = array( |
— | — | @@ -40,6 +41,7 @@ |
41 | 42 | 'wlk-subtitle' => array( '0', 'subtítulo', 'subtitle' ), |
42 | 43 | 'wlk-summary' => array( '1', 'resumo', 'summary' ), |
43 | 44 | 'wlk-hidden' => array( '0', 'oculto', 'hidden' ), |
| 45 | + 'wlk-more' => array( '0', 'mais', 'more' ), |
44 | 46 | ); |
45 | 47 | |
46 | 48 | /** |
— | — | @@ -54,4 +56,5 @@ |
55 | 57 | 'wlk-subtitle' => array( '0', 'subtítulo', 'subtitle' ), |
56 | 58 | 'wlk-summary' => array( '1', 'resumo', 'summary' ), |
57 | 59 | 'wlk-hidden' => array( '0', 'oculto', 'hidden' ), |
| 60 | + 'wlk-more' => array( '0', 'mais', 'more' ), |
58 | 61 | ); |
Index: trunk/extensions/Wikilog/Wikilog.php |
— | — | @@ -127,8 +127,9 @@ |
128 | 128 | // WikilogParser hooks |
129 | 129 | $wgHooks['ParserFirstCallInit'][] = 'WikilogParser::FirstCallInit'; |
130 | 130 | $wgHooks['ParserClearState'][] = 'WikilogParser::ClearState'; |
131 | | -$wgHooks['ParserBeforeInternalParse'][] = 'WikilogParser::BeforeInternalParse'; |
| 131 | +$wgHooks['ParserBeforeStrip'][] = 'WikilogParser::BeforeStrip'; |
132 | 132 | $wgHooks['ParserAfterTidy'][] = 'WikilogParser::AfterTidy'; |
| 133 | +$wgHooks['InternalParseBeforeLinks'][] = 'WikilogParser::InternalParseBeforeLinks'; |
133 | 134 | $wgHooks['GetLocalURL'][] = 'WikilogParser::GetLocalURL'; |
134 | 135 | $wgHooks['GetFullURL'][] = 'WikilogParser::GetFullURL'; |
135 | 136 | |
Index: trunk/extensions/Wikilog/RELEASE-NOTES |
— | — | @@ -13,6 +13,10 @@ |
14 | 14 | others replaced by wikilog-summary-header/footer, wikilog-entry-header/footer |
15 | 15 | and others, with better support for plural and gender inflection, separate |
16 | 16 | date and time, and also support to category and tag lists. |
| 17 | +* (wl:issue 2) Improved article summary extraction. Now a "--more--" marker |
| 18 | + alone in a line breaks the summary (above) from the rest of the article |
| 19 | + (below). The <summary> tag has precedence over this new marker. If neither |
| 20 | + is found, the first section is used. |
17 | 21 | |
18 | 22 | === Bug fixes === |
19 | 23 | |
Index: trunk/extensions/Wikilog/WikilogParser.php |
— | — | @@ -35,6 +35,11 @@ |
36 | 36 | class WikilogParser |
37 | 37 | { |
38 | 38 | /** |
| 39 | + * Anchor printed when a --more-- separator is substituted. |
| 40 | + */ |
| 41 | + const MORE_ANCHOR = "<span id=\"wl-more\"></span>"; |
| 42 | + |
| 43 | + /** |
39 | 44 | * True if parsing articles with feed output specific settings. |
40 | 45 | * This is an horrible hack needed because of many MediaWiki misdesigns. |
41 | 46 | */ |
— | — | @@ -73,7 +78,10 @@ |
74 | 79 | * ParserClearState hook handler function. |
75 | 80 | */ |
76 | 81 | public static function ClearState( &$parser ) { |
| 82 | + # These two parser attributes contain our private information. |
| 83 | + # They take a piggyback ride on the parser object. |
77 | 84 | $parser->mExtWikilog = new WikilogParserOutput; |
| 85 | + $parser->mExtWikilogInfo = NULL; |
78 | 86 | |
79 | 87 | # Disable TOC in feeds. |
80 | 88 | if ( self::$feedParsing ) { |
— | — | @@ -83,9 +91,9 @@ |
84 | 92 | } |
85 | 93 | |
86 | 94 | /** |
87 | | - * ParserBeforeInternalParse hook handler function. |
| 95 | + * ParserBeforeStrip hook handler function. |
88 | 96 | */ |
89 | | - public static function BeforeInternalParse( &$parser, &$text, &$stripState ) { |
| 97 | + public static function BeforeStrip( &$parser, &$text, &$stripState ) { |
90 | 98 | global $wgUser; |
91 | 99 | |
92 | 100 | # Do nothing if a title is not set. |
— | — | @@ -93,13 +101,13 @@ |
94 | 102 | return true; |
95 | 103 | |
96 | 104 | # Do nothing if it is not a wikilog article. |
97 | | - if ( ! ( $wi = Wikilog::getWikilogInfo( $parser->getTitle() ) ) ) |
| 105 | + if ( ! ( $parser->mExtWikilogInfo = Wikilog::getWikilogInfo( $title ) ) ) |
98 | 106 | return true; |
99 | 107 | |
100 | | - if ( $wi->isItem() ) { |
| 108 | + if ( $parser->mExtWikilogInfo->isItem() ) { |
101 | 109 | # By default, use the item name as the default sort in categories. |
102 | 110 | # This can be overriden by {{DEFAULTSORT:...}} if the user wants. |
103 | | - $parser->setDefaultSort( $wi->getItemName() ); |
| 111 | + $parser->setDefaultSort( $parser->mExtWikilogInfo->getItemName() ); |
104 | 112 | } |
105 | 113 | |
106 | 114 | return true; |
— | — | @@ -114,10 +122,52 @@ |
115 | 123 | } |
116 | 124 | |
117 | 125 | /** |
| 126 | + * InternalParseBeforeLinks hook handler function. Called after nowiki, |
| 127 | + * comments and templates are treated. |
| 128 | + * For wikilog pages, look for the "--more--" marker and extract the |
| 129 | + * article summary before it. If not found, look for the first heading |
| 130 | + * and use the text before it (intro section). |
| 131 | + */ |
| 132 | + public static function InternalParseBeforeLinks( &$parser, &$text, &$stripState ) { |
| 133 | + if ( $parser->mExtWikilogInfo && $parser->mExtWikilogInfo->isItem() ) { |
| 134 | + static $moreRegex = false; |
| 135 | + if ( $moreRegex === false ) { |
| 136 | + $mwMore =& MagicWord::get( 'wlk-more' ); |
| 137 | + $words = $mwMore->getBaseRegex(); |
| 138 | + $flags = $mwMore->getRegexCase(); |
| 139 | + $moreRegex = "/(?<=^|\\n)--+ *(?:$words) *--+\s*/$flags"; |
| 140 | + } |
| 141 | + |
| 142 | + # Find and replace the --more-- marker. Extract summary. |
| 143 | + # We do it anyway even if the summary is already set, in order |
| 144 | + # to replace the marker with an invisible anchor. |
| 145 | + $p = preg_split( $moreRegex, $text, 2 ); |
| 146 | + if ( count( $p ) > 1 ) { |
| 147 | + self::trySetSummary( $parser, trim( $p[0] ) ); |
| 148 | + $anchor = $parser->insertStripItem( self::MORE_ANCHOR ); |
| 149 | + $text = $p[0] . $anchor . $p[1]; |
| 150 | + } else if ( !$parser->mExtWikilog->mSummary ) { |
| 151 | + # Otherwise, make a summary from the intro section. |
| 152 | + # Why we don't use $parser->getSection()? Because it has the |
| 153 | + # side-effect of clearing the parser state, which is bad here |
| 154 | + # since this hook happens during parsing. Instead, we |
| 155 | + # anticipate the $parser->doHeadings() call and extract the |
| 156 | + # text before the first heading. |
| 157 | + $text = $parser->doHeadings( $text ); |
| 158 | + $p = preg_split( '/<(h[1-6])\\b.*?>.*?<\\/\\1\\s*>/i', $text, 2 ); |
| 159 | + if ( count( $p ) > 1 ) { |
| 160 | + self::trySetSummary( $parser, trim( $p[0] ) ); |
| 161 | + } |
| 162 | + } |
| 163 | + } |
| 164 | + return true; |
| 165 | + } |
| 166 | + |
| 167 | + /** |
118 | 168 | * GetLocalURL hook handler function. |
119 | 169 | * Expands local URL @a $url if self::$expandingUrls is true. |
120 | 170 | */ |
121 | | - static function GetLocalURL( &$title, &$url, $query ) { |
| 171 | + public static function GetLocalURL( &$title, &$url, $query ) { |
122 | 172 | if ( self::$expandingUrls ) { |
123 | 173 | $url = wfExpandUrl( $url ); |
124 | 174 | } |
— | — | @@ -132,7 +182,7 @@ |
133 | 183 | * from Title::getLocalURL() in situations where action != 'render'. |
134 | 184 | * @todo Report this bug to MediaWiki bugzilla. |
135 | 185 | */ |
136 | | - static function GetFullURL( &$title, &$url, $query ) { |
| 186 | + public static function GetFullURL( &$title, &$url, $query ) { |
137 | 187 | global $wgServer; |
138 | 188 | if ( self::$expandingUrls ) { |
139 | 189 | $l = strlen( $wgServer ); |
— | — | @@ -155,14 +205,8 @@ |
156 | 206 | |
157 | 207 | # Remove extra space to make block rendering easier. |
158 | 208 | $text = trim( $text ); |
| 209 | + self::trySetSummary( $parser, $text ); |
159 | 210 | |
160 | | - if ( !$parser->mExtWikilog->mSummary ) { |
161 | | - $popt = $parser->getOptions(); |
162 | | - $popt->enableLimitReport( false ); |
163 | | - $output = $parser->parse( $text, $parser->getTitle(), $popt, true, false ); |
164 | | - $parser->mExtWikilog->mSummary = $output->getText(); |
165 | | - } |
166 | | - |
167 | 211 | $hidden = WikilogUtils::arrayMagicKeyGet( $params, $mwHidden ); |
168 | 212 | return $hidden ? '<!-- -->' : $parser->recursiveTagParse( $text ); |
169 | 213 | } |
— | — | @@ -401,6 +445,23 @@ |
402 | 446 | # |
403 | 447 | |
404 | 448 | /** |
| 449 | + * Set the article summary, ignore if already set. |
| 450 | + * @return True if set, false otherwise. |
| 451 | + */ |
| 452 | + private static function trySetSummary( &$parser, $text ) { |
| 453 | + if ( !$parser->mExtWikilog->mSummary ) { |
| 454 | + $popt = clone $parser->getOptions(); |
| 455 | + $popt->enableLimitReport( false ); |
| 456 | + $output = $parser->parse( $text, $parser->getTitle(), $popt, true, false ); |
| 457 | + $parser->mExtWikilog->mSummary = $output->getText(); |
| 458 | +// wfDebug( "Wikilog summary set to:\n----\n" . $parser->mExtWikilog->mSummary . "\n----\n" ); |
| 459 | + return true; |
| 460 | + } else { |
| 461 | + return false; |
| 462 | + } |
| 463 | + } |
| 464 | + |
| 465 | + /** |
405 | 466 | * Adds an author to the current article. If too many authors, warns. |
406 | 467 | * @return False on overflow, true otherwise. |
407 | 468 | */ |
Index: trunk/extensions/Wikilog/WikilogUtils.php |
— | — | @@ -237,31 +237,39 @@ |
238 | 238 | } |
239 | 239 | |
240 | 240 | /** |
241 | | - * Split summary of a wikilog post from the contents. |
242 | | - * If summary was provided in <summary>...</summary> tags, use it, |
243 | | - * otherwise, use some heuristics to find it in the content. |
| 241 | + * Split summary of a wikilog article from the contents. |
| 242 | + * If summary is part of the parser output, use it; otherwise, try to |
| 243 | + * extract it from the content text (section zero, before the first |
| 244 | + * heading). |
| 245 | + * |
| 246 | + * @param $parserOutput ParserOutput object. |
| 247 | + * @return Two-element array with summary and content. Summary may be |
| 248 | + * NULL if nonexistent. |
244 | 249 | */ |
245 | 250 | public static function splitSummaryContent( $parserOutput ) { |
| 251 | + global $wgUseTidy; |
| 252 | + |
246 | 253 | $content = Sanitizer::removeHTMLcomments( $parserOutput->getText() ); |
247 | 254 | |
248 | 255 | if ( isset( $parserOutput->mExtWikilog ) && $parserOutput->mExtWikilog->mSummary ) { |
| 256 | + # Parser output contains wikilog output and summary, use it. |
249 | 257 | $summary = Sanitizer::removeHTMLcomments( $parserOutput->mExtWikilog->mSummary ); |
250 | 258 | } else { |
251 | | - $blocks = preg_split( '/< (h[1-6]) .*? > .*? <\\/\\1>/ix', $content ); |
252 | | - |
| 259 | + # Try to extract summary from the content text. |
| 260 | + $blocks = preg_split( '/<(h[1-6]).*?>.*?<\\/\\1>/i', $content, 2 ); |
253 | 261 | if ( count( $blocks ) > 1 ) { |
254 | | - # Long article, get only the first paragraph. |
255 | | - $pextr = '/<(p) |
256 | | - ( \\s+ (?: [^\'"\\/>] | \'[^\']*\' | "[^"]*" )* )? |
257 | | - (?: > .*? <\\/\\1\\s*> | \\/> )/isx'; |
258 | | - |
259 | | - if ( preg_match_all( $pextr, $blocks[0], $m ) ) { |
260 | | - $summary = implode( "\n", $m[0] ); |
261 | | - } else { |
262 | | - $summary = NULL; |
| 262 | + # Long article with multiple sections, use only the first one. |
| 263 | + $summary = $blocks[0]; |
| 264 | + # It is possible for the regex to split on a heading that is |
| 265 | + # not a child of the root element (e.g. <div><h2>...</h2> |
| 266 | + # </div> leaving an open <div> tag). In order to handle such |
| 267 | + # cases, we pass the summary through tidy if it is available. |
| 268 | + if ( $wgUseTidy ) { |
| 269 | + $summary = MWTidy::tidy( $summary ); |
263 | 270 | } |
264 | 271 | } else { |
265 | | - # Short article, no summary. |
| 272 | + # Short article with a single section, use no summary and |
| 273 | + # leave to the caller to decide what to do. |
266 | 274 | $summary = NULL; |
267 | 275 | } |
268 | 276 | } |