r46756 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r46755‎ | r46756 | r46757 >
Date:04:58, 3 February 2009
Author:werdna
Status:deferred (Comments)
Tags:
Comment:
Re-implementation of r46725 (caching of Cite <references /> output).

This time, I've written a Parser method called serialiseHalfParsedText, which, as the name implies, grabs some half-parsed text, and fixes up all of the strip
markers, and link comments, and makes it safe to import some other time with unserialiseHalfParsedText. I tested it by live-hacking the cache key to be a constant,
and then putting <references /> on a completely different page, where it worked perfectly.
Modified paths:
  • /trunk/extensions/Cite/Cite_body.php (modified) (history)
  • /trunk/phase3/includes/parser/Parser.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/parser/Parser.php
@@ -4764,6 +4764,102 @@
47654765 }
47664766 return $out;
47674767 }
 4768+
 4769+ function serialiseHalfParsedText( $text ) {
 4770+ $data = array();
 4771+ $data['text'] = $text;
 4772+
 4773+ // First, find all strip markers, and store their
 4774+ // data in an array.
 4775+ $stripState = new StripState;
 4776+ $pos = 0;
 4777+ while( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) ) && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) ) {
 4778+ $end_pos += strlen( self::MARKER_SUFFIX );
 4779+ $marker = substr( $text, $start_pos, $end_pos-$start_pos );
 4780+
 4781+ if ( !empty( $this->mStripState->general->data[$marker] ) ) {
 4782+ $replaceArray = $stripState->general;
 4783+ $stripText = $this->mStripState->general->data[$marker];
 4784+ } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) {
 4785+ $replaceArray = $stripState->nowiki;
 4786+ $stripText = $this->mStripState->nowiki->data[$marker];
 4787+ } else {
 4788+ throw new MWException( "Hanging strip marker: '$marker'." );
 4789+ }
 4790+
 4791+ $replaceArray->setPair( $marker, $stripText );
 4792+ $pos = $end_pos;
 4793+ }
 4794+ $data['stripstate'] = $stripState;
 4795+
 4796+ // Now, find all of our links, and store THEIR
 4797+ // data in an array! :)
 4798+ $links = array( 'internal' => array(), 'interwiki' => array() );
 4799+ $pos = 0;
 4800+
 4801+ // Internal links
 4802+ while( ( $start_pos = strpos( $text, '<!--LINK ', $pos ) ) ) {
 4803+ list( $ns, $trail ) = explode( ':', substr( $text, $start_pos + strlen( '<!--LINK ' ) ), 2 );
 4804+
 4805+ $ns = trim($ns);
 4806+ if (empty( $links['internal'][$ns] )) {
 4807+ $links['internal'][$ns] = array();
 4808+ }
 4809+
 4810+ $key = trim( substr( $trail, 0, strpos( $trail, '-->' ) ) );
 4811+ $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key];
 4812+ $pos = $start_pos + strlen( "<!--LINK $ns:$key-->" );
 4813+ }
 4814+
 4815+ $pos = 0;
 4816+
 4817+ // Interwiki links
 4818+ while( ( $start_pos = strpos( $text, '<!--IWLINK ', $pos ) ) ) {
 4819+ $data = substr( $text, $start_pos );
 4820+ $key = trim( substr( $data, 0, strpos( $data, '-->' ) ) );
 4821+ $links['interwiki'][] = $this->mLinkHolders->interwiki[$key];
 4822+ $pos = $start_pos + strlen( "<!--IWLINK $key-->" );
 4823+ }
 4824+
 4825+ $data['linkholder'] = $links;
 4826+
 4827+ return $data;
 4828+ }
 4829+
 4830+ function unserialiseHalfParsedText( $data, $intPrefix = null /* Unique identifying prefix */ ) {
 4831+ if (!$intPrefix)
 4832+ $intPrefix = $this->getRandomString();
 4833+
 4834+ // First, extract the strip state.
 4835+ $stripState = $data['stripstate'];
 4836+ $this->mStripState->general->merge( $stripState->general );
 4837+ $this->mStripState->nowiki->merge( $stripState->nowiki );
 4838+
 4839+ // Now, extract the text, and renumber links
 4840+ $text = $data['text'];
 4841+ $links = $data['linkholder'];
 4842+
 4843+ // Internal...
 4844+ foreach( $links['internal'] as $ns => $nsLinks ) {
 4845+ foreach( $nsLinks as $key => $entry ) {
 4846+ $newKey = $intPrefix . '-' . $key;
 4847+ $this->mLinkHolders->internals[$ns][$newKey] = $entry;
 4848+
 4849+ $text = str_replace( "<!--LINK $ns:$key-->", "<!--LINK $ns:$newKey-->", $text );
 4850+ }
 4851+ }
 4852+
 4853+ // Interwiki...
 4854+ foreach( $links['interwiki'] as $key => $entry ) {
 4855+ $newKey = "$intPrefix-$key";
 4856+ $this->mLinkHolders->interwikis[$newKey] = $entry;
 4857+
 4858+ $text = str_replace( "<!--IWLINK $key-->", "<!--IWLINK $newKey-->", $text );
 4859+ }
 4860+
 4861+ // Should be good to go.
 4862+ return $text;
 4863+ }
47684864 }
47694865
47704866 /**
Index: trunk/extensions/Cite/Cite_body.php
@@ -369,17 +369,20 @@
370370 wfProfileOut( __METHOD__ .'-entries' );
371371
372372 wfProfileIn( __METHOD__.'-cache-get' );
373 - $ret = $wgMemc->get( $cacheKey );
 373+ $data = $wgMemc->get( $cacheKey );
374374 wfProfileOut( __METHOD__.'-cache-get' );
375375
376 - if ( !$ret ) {
 376+ if ( !$data ) {
377377 wfProfileIn( __METHOD__ .'-parse' );
378378
379379 // Live hack: parse() adds two newlines on WM, can't reproduce it locally -ævar
380380 $ret = rtrim( $this->parse( $parserInput ), "\n" );
381 - $wgMemc->set( $cacheKey, $ret, 86400 );
 381+ $serData = $this->mParser->serialiseHalfParsedText( $ret );
 382+ $wgMemc->set( $cacheKey, $serData, 86400 );
382383
383384 wfProfileOut( __METHOD__ .'-parse' );
 385+ } else {
 386+ $ret = $this->mParser->unserialiseHalfParsedText( $data );
384387 }
385388
386389 wfProfileOut( __METHOD__ );

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r46725Cache cite <references /> output by contents. That way, changes that don't af...werdna19:29, 2 February 2009

Comments

#Comment by Werdna (talk | contribs)   18:30, 12 February 2009

Config option added in r47190.

#Comment by Tim Starling (talk | contribs)   02:32, 23 February 2011

Please use the spelling "serialize" in future, for consistency with the PHP function name.

Status & tagging log