Index: trunk/extensions/ActiveAbstract/AbstractFilter.php |
— | — | @@ -1,5 +1,4 @@ |
2 | 2 | <?php |
3 | | - |
4 | 3 | /** |
5 | 4 | * Generate XML feed for Yahoo's Active Abstracts project |
6 | 5 | * Plugin for dumpBackup.php; call as eg: |
— | — | @@ -29,60 +28,60 @@ |
30 | 29 | $dumper->registerFilter( 'abstract', 'AbstractFilter' ); |
31 | 30 | $dumper->registerFilter( 'noredirect', 'NoredirectFilter' ); |
32 | 31 | } |
33 | | - |
34 | | - function AbstractFilter( &$sink, $params='' ) { |
| 32 | + |
| 33 | + function AbstractFilter( &$sink, $params = '' ) { |
35 | 34 | $this->sink =& $sink; |
36 | | - |
| 35 | + |
37 | 36 | $bits = explode( '=', $params, 2 ); |
38 | | - if( count( $bits ) == 2 && $bits[0] == 'variant' ) { |
| 37 | + if ( count( $bits ) == 2 && $bits[0] == 'variant' ) { |
39 | 38 | $this->variant = $bits[1]; |
40 | 39 | } else { |
41 | 40 | $this->variant = false; |
42 | 41 | } |
43 | 42 | } |
44 | | - |
| 43 | + |
45 | 44 | function writeOpenStream( $string ) { |
46 | 45 | $this->sink->writeOpenStream( "<feed>\n" ); |
47 | 46 | } |
48 | | - |
| 47 | + |
49 | 48 | function writeCloseStream( $string ) { |
50 | 49 | $this->sink->writeCloseStream( "</feed>\n" ); |
51 | 50 | } |
52 | | - |
| 51 | + |
53 | 52 | function writeOpenPage( $page, $string ) { |
54 | 53 | global $wgSitename; |
55 | 54 | $this->title = Title::makeTitle( $page->page_namespace, $page->page_title ); |
56 | 55 | $title = $wgSitename . wfMsg( 'colon-separator' ) . $this->title->getPrefixedText(); |
57 | | - |
| 56 | + |
58 | 57 | $xml = "<doc>\n"; |
59 | 58 | $xml .= Xml::element( 'title', null, $this->_variant( $title ) ) . "\n"; |
60 | 59 | $xml .= Xml::element( 'url', null, $this->title->getFullUrl() ) . "\n"; |
61 | | - |
| 60 | + |
62 | 61 | // add abstract and links when we have revision data... |
63 | 62 | $this->revision = null; |
64 | | - |
| 63 | + |
65 | 64 | $this->sink->writeOpenPage( $page, $xml ); |
66 | 65 | } |
67 | | - |
| 66 | + |
68 | 67 | function writeClosePage( $string ) { |
69 | 68 | $xml = ''; |
70 | | - if( $this->revision ) { |
| 69 | + if ( $this->revision ) { |
71 | 70 | $xml .= Xml::element( 'abstract', null, |
72 | 71 | $this->_variant( |
73 | 72 | $this->_abstract( $this->revision ) ) ) . "\n"; |
74 | 73 | $xml .= "<links>\n"; |
75 | | - |
| 74 | + |
76 | 75 | $links = $this->_sectionLinks( $this->revision ); |
77 | | - if( empty( $links ) ) { |
| 76 | + if ( empty( $links ) ) { |
78 | 77 | // If no TOC, they want us to fall back to categories. |
79 | 78 | $links = $this->_categoryLinks( $this->revision ); |
80 | 79 | } |
81 | | - foreach( $links as $anchor => $url ) { |
| 80 | + foreach ( $links as $anchor => $url ) { |
82 | 81 | $xml .= $this->_formatLink( $url, $anchor, 'nav' ); |
83 | 82 | } |
84 | | - |
| 83 | + |
85 | 84 | // @todo: image links |
86 | | - |
| 85 | + |
87 | 86 | $xml .= "</links>\n"; |
88 | 87 | } |
89 | 88 | $xml .= "</doc>\n"; |
— | — | @@ -90,12 +89,12 @@ |
91 | 90 | $this->title = null; |
92 | 91 | $this->revision = null; |
93 | 92 | } |
94 | | - |
| 93 | + |
95 | 94 | function writeRevision( $rev, $string ) { |
96 | 95 | // Only use one revision's worth of data to output |
97 | 96 | $this->revision = $rev; |
98 | 97 | } |
99 | | - |
| 98 | + |
100 | 99 | /** |
101 | 100 | * Extract an abstract from the page |
102 | 101 | * @params object $rev Database rows with revision data |
— | — | @@ -104,14 +103,14 @@ |
105 | 104 | */ |
106 | 105 | function _abstract( $rev ) { |
107 | 106 | $text = Revision::getRevisionText( $rev ); // FIXME cache this |
108 | | - |
| 107 | + |
109 | 108 | $stripped = $this->_stripMarkup( $text ); |
110 | 109 | $extract = $this->_extractStart( $stripped ); |
111 | 110 | $clipped = substr( $extract, 0, 1024 ); // not too long pls |
112 | | - |
| 111 | + |
113 | 112 | return UtfNormal::cleanUp( $clipped ); |
114 | 113 | } |
115 | | - |
| 114 | + |
116 | 115 | /** |
117 | 116 | * Convert text to the preferred output language variant, if set. |
118 | 117 | * @param string $text |
— | — | @@ -119,14 +118,14 @@ |
120 | 119 | * @access private |
121 | 120 | */ |
122 | 121 | function _variant( $text ) { |
123 | | - if( $this->variant ) { |
| 122 | + if ( $this->variant ) { |
124 | 123 | global $wgContLang; |
125 | 124 | return $wgContLang->mConverter->translate( $text, $this->variant ); |
126 | 125 | } else { |
127 | 126 | return $text; |
128 | 127 | } |
129 | 128 | } |
130 | | - |
| 129 | + |
131 | 130 | /** |
132 | 131 | * Strip markup to show plaintext |
133 | 132 | * @param string $text |
— | — | @@ -135,9 +134,9 @@ |
136 | 135 | */ |
137 | 136 | function _stripMarkup( $text ) { |
138 | 137 | global $wgContLang; |
139 | | - |
| 138 | + |
140 | 139 | $text = substr( $text, 0, 4096 ); // don't bother with long text... |
141 | | - |
| 140 | + |
142 | 141 | $image = preg_quote( $wgContLang->getNsText( NS_IMAGE ), '#' ); |
143 | 142 | $text = str_replace( "'''", "", $text ); |
144 | 143 | $text = str_replace( "''", "", $text ); |
— | — | @@ -163,7 +162,7 @@ |
164 | 163 | $text = Sanitizer::decodeCharReferences( $text ); |
165 | 164 | return trim( $text ); |
166 | 165 | } |
167 | | - |
| 166 | + |
168 | 167 | /** |
169 | 168 | * Extract the first two sentences, if detectable, from the text. |
170 | 169 | * @param string $text |
— | — | @@ -177,13 +176,13 @@ |
178 | 177 | '.', '!', '?', // double-width roman forms |
179 | 178 | '。', // half-width ideographic full stop |
180 | 179 | ); |
181 | | - |
| 180 | + |
182 | 181 | $endgroup = implode( '', array_map( 'preg_quote', $endchars ) ); |
183 | 182 | $end = "[$endgroup]"; |
184 | 183 | $sentence = ".*?$end+"; |
185 | 184 | $firsttwo = "/^($sentence$sentence)/u"; |
186 | | - |
187 | | - if( preg_match( $firsttwo, $text, $matches ) ) { |
| 185 | + |
| 186 | + if ( preg_match( $firsttwo, $text, $matches ) ) { |
188 | 187 | return $matches[1]; |
189 | 188 | } else { |
190 | 189 | // Just return the first line |
— | — | @@ -191,7 +190,7 @@ |
192 | 191 | return trim( $lines[0] ); |
193 | 192 | } |
194 | 193 | } |
195 | | - |
| 194 | + |
196 | 195 | /** |
197 | 196 | * Extract a list of TOC links |
198 | 197 | * @param object $rev Database rows with revision data |
— | — | @@ -206,11 +205,11 @@ |
207 | 206 | $secs = |
208 | 207 | preg_split( |
209 | 208 | '/(^=+.+?=+|^<h[1-6].*?' . '>.*?<\/h[1-6].*?' . '>)(?!\S)/mi', |
210 | | - $text, -1, |
| 209 | + $text, - 1, |
211 | 210 | PREG_SPLIT_DELIM_CAPTURE ); |
212 | | - |
| 211 | + |
213 | 212 | $headers = array(); |
214 | | - for( $i = 1; $i < count( $secs ); $i += 2 ) { |
| 213 | + for ( $i = 1; $i < count( $secs ); $i += 2 ) { |
215 | 214 | $inside = preg_replace( '/^=+\s*(.*?)\s*=+/', '$1', $secs[$i] ); |
216 | 215 | $stripped = $this->_stripMarkup( $inside ); // strip internal markup and <h[1-6]> |
217 | 216 | $header = UtfNormal::cleanUp( $stripped ); |
— | — | @@ -220,7 +219,7 @@ |
221 | 220 | } |
222 | 221 | return $headers; |
223 | 222 | } |
224 | | - |
| 223 | + |
225 | 224 | /** |
226 | 225 | * Fetch the list of category links for this page |
227 | 226 | * @param object $rev Database rows with revision data |
— | — | @@ -234,17 +233,17 @@ |
235 | 234 | array( 'cl_to' ), |
236 | 235 | array( 'cl_from' => $id ), |
237 | 236 | 'AbstractFilter::_categoryLinks' ); |
238 | | - |
| 237 | + |
239 | 238 | $links = array(); |
240 | | - while( $row = $dbr->fetchObject( $result ) ) { |
| 239 | + while ( $row = $dbr->fetchObject( $result ) ) { |
241 | 240 | $category = Title::makeTitle( NS_CATEGORY, $row->cl_to ); |
242 | 241 | $links[$category->getText()] = $category->getFullUrl(); |
243 | 242 | } |
244 | 243 | $dbr->freeResult( $result ); |
245 | | - |
| 244 | + |
246 | 245 | return $links; |
247 | 246 | } |
248 | | - |
| 247 | + |
249 | 248 | /** |
250 | 249 | * Format a <sublink> element, like so: |
251 | 250 | * <sublink linktype="nav"> |
— | — | @@ -265,7 +264,6 @@ |
266 | 265 | Xml::element( 'link', null, substr( $url, 0, $maxUrlLength ) ) . |
267 | 266 | Xml::closeElement( 'sublink' ) . "\n"; |
268 | 267 | } |
269 | | - |
270 | 268 | } |
271 | 269 | |
272 | 270 | class NoredirectFilter extends DumpFilter { |
— | — | @@ -273,4 +271,3 @@ |
274 | 272 | return !$page->page_is_redirect; |
275 | 273 | } |
276 | 274 | } |
277 | | - |
Index: trunk/extensions/ActiveAbstract/GoogleCoopFilter.php |
— | — | @@ -1,10 +1,9 @@ |
2 | 2 | <?php |
| 3 | +require_once( 'AbstractFilter.php' ); |
3 | 4 | |
4 | | -require_once('AbstractFilter.php'); |
5 | | - |
6 | 5 | /** |
7 | 6 | * Dump filter for creation of a Google Coop 'Subscribed Links' file |
8 | | - * |
| 7 | + * |
9 | 8 | * Usage: |
10 | 9 | * |
11 | 10 | * HOSTNAME=kamelopedia.mormo.org php dumpBackup.php \ |
— | — | @@ -36,6 +35,7 @@ |
37 | 36 | * @addtogroup maintenance |
38 | 37 | * |
39 | 38 | */ |
| 39 | + |
40 | 40 | class GoogleCoopFilter extends AbstractFilter { |
41 | 41 | /** |
42 | 42 | * Register the filter function with the dump manager |
— | — | @@ -72,7 +72,7 @@ |
73 | 73 | $wgSitename . ':' . $this->title->getPrefixedText() ) . "\n"; |
74 | 74 | $xml .= ' ' . Xml::element( 'Output', array( 'name' => 'more_url' ), |
75 | 75 | $this->title->getFullUrl() ) . "\n"; |
76 | | - |
| 76 | + |
77 | 77 | // add abstract and links when we have revision data... |
78 | 78 | $this->revision = null; |
79 | 79 | |
— | — | @@ -87,9 +87,9 @@ |
88 | 88 | $text = '-'; |
89 | 89 | } |
90 | 90 | $lines = $this->_threeLines( $text ); |
91 | | - for( $i=1; $i<4; $i++ ) { |
| 91 | + for ( $i = 1; $i < 4; $i++ ) { |
92 | 92 | if ( $lines[$i] != '' ) { |
93 | | - $xml .= ' ' . Xml::element( 'Output', array( 'name' => 'text'.$i ), $lines[$i] ) . "\n"; |
| 93 | + $xml .= ' ' . Xml::element( 'Output', array( 'name' => 'text' . $i ), $lines[$i] ) . "\n"; |
94 | 94 | } |
95 | 95 | } |
96 | 96 | } |
— | — | @@ -105,14 +105,14 @@ |
106 | 106 | |
107 | 107 | /** |
108 | 108 | * Returns an array of three strings, each string of the array has no more than |
109 | | - * 79 characters. The three strings are the first three 'lines' of the text |
| 109 | + * 79 characters. The three strings are the first three 'lines' of the text |
110 | 110 | * given in $str. |
111 | 111 | * |
112 | 112 | * Lines are split at the last blank before position 79. |
113 | 113 | * If there's no blank before position, the entire string is returned as first |
114 | 114 | * element of the result array. |
115 | 115 | * |
116 | | - * This code needs a cleanup, it became rather ugly after adding exception |
| 116 | + * This code needs a cleanup, it became rather ugly after adding exception |
117 | 117 | * handling :-( |
118 | 118 | */ |
119 | 119 | function _threeLines( $str ) { |
— | — | @@ -153,6 +153,4 @@ |
154 | 154 | |
155 | 155 | return $s; |
156 | 156 | } |
157 | | - |
158 | 157 | } |
159 | | - |