r42547 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r42546‎ | r42547 | r42548 >
Date:14:01, 25 October 2008
Author:tstarling
Status:old
Tags:
Comment:
Removed scary YAML parsing code, the function of which is to load from a user input string, except if the string has no line breaks in it, in which case an arbitrary local file/URL is loaded and handed to the attacker on a nicely encapsulated silver platter. Doesn't appear to be used either by core or extensions, but you never know when someone might try.
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/api/ApiFormatYaml_spyc.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/api/ApiFormatYaml_spyc.php
@@ -9,44 +9,6 @@
1010 */
1111
1212 /**
13 - * A node, used by Spyc for parsing YAML.
14 - * @ingroup API
15 - */
16 - class YAMLNode {
17 - /**#@+
18 - * @access public
19 - * @var string
20 - */
21 - var $parent;
22 - var $id;
23 - /**#@-*/
24 - /**
25 - * @access public
26 - * @var mixed
27 - */
28 - var $data;
29 - /**
30 - * @access public
31 - * @var int
32 - */
33 - var $indent;
34 - /**
35 - * @access public
36 - * @var bool
37 - */
38 - var $children = false;
39 -
40 - /**
41 - * The constructor assigns the node a unique ID.
42 - * @access public
43 - * @return void
44 - */
45 - function YAMLNode() {
46 - $this->id = uniqid('');
47 - }
48 - }
49 -
50 - /**
5113 * The Simple PHP YAML Class.
5214 *
5315 * This class can be used to read a YAML file and convert its contents
@@ -63,26 +25,6 @@
6426 class Spyc {
6527
6628 /**
67 - * Load YAML into a PHP array statically
68 - *
69 - * The load method, when supplied with a YAML stream (string or file),
70 - * will do its best to convert YAML in a file into a PHP array. Pretty
71 - * simple.
72 - * Usage:
73 - * <code>
74 - * $array = Spyc::YAMLLoad('lucky.yml');
75 - * print_r($array);
76 - * </code>
77 - * @access public
78 - * @return array
79 - * @param string $input Path of YAML file or string containing YAML
80 - */
81 - function YAMLLoad($input) {
82 - $spyc = new Spyc;
83 - return $spyc->load($input);
84 - }
85 -
86 - /**
8729 * Dump YAML from PHP array statically
8830 *
8931 * The dump method, when supplied with an array, will do its best
@@ -109,176 +51,6 @@
11052 }
11153
11254 /**
113 - * Load YAML into a PHP array from an instantiated object
114 - *
115 - * The load method, when supplied with a YAML stream (string or file path),
116 - * will do its best to convert the YAML into a PHP array. Pretty simple.
117 - * Usage:
118 - * <code>
119 - * $parser = new Spyc;
120 - * $array = $parser->load('lucky.yml');
121 - * print_r($array);
122 - * </code>
123 - * @access public
124 - * @return array
125 - * @param string $input Path of YAML file or string containing YAML
126 - */
127 - function load($input) {
128 - // See what type of input we're talking about
129 - // If it's not a file, assume it's a string
130 - if (!empty($input) && (strpos($input, "\n") === false)
131 - && file_exists($input)) {
132 - $yaml = file($input);
133 - } else {
134 - $yaml = explode("\n",$input);
135 - }
136 - // Initiate some objects and values
137 - $base = new YAMLNode;
138 - $base->indent = 0;
139 - $this->_lastIndent = 0;
140 - $this->_lastNode = $base->id;
141 - $this->_inBlock = false;
142 - $this->_isInline = false;
143 -
144 - foreach ($yaml as $linenum => $line) {
145 - $ifchk = trim($line);
146 -
147 - // If the line starts with a tab (instead of a space), throw a fit.
148 - if (preg_match('/^(\t)+(\w+)/', $line)) {
149 - $err = 'ERROR: Line '. ($linenum + 1) .' in your input YAML begins'.
150 - ' with a tab. YAML only recognizes spaces. Please reformat.';
151 - die($err);
152 - }
153 -
154 - if ($this->_inBlock === false && empty($ifchk)) {
155 - continue;
156 - } elseif ($this->_inBlock == true && empty($ifchk)) {
157 - $last =& $this->_allNodes[$this->_lastNode];
158 - $last->data[key($last->data)] .= "\n";
159 - } elseif ($ifchk{0} != '#' && substr($ifchk,0,3) != '---') {
160 - // Create a new node and get its indent
161 - $node = new YAMLNode;
162 - $node->indent = $this->_getIndent($line);
163 -
164 - // Check where the node lies in the hierarchy
165 - if ($this->_lastIndent == $node->indent) {
166 - // If we're in a block, add the text to the parent's data
167 - if ($this->_inBlock === true) {
168 - $parent =& $this->_allNodes[$this->_lastNode];
169 - $parent->data[key($parent->data)] .= trim($line).$this->_blockEnd;
170 - } else {
171 - // The current node's parent is the same as the previous node's
172 - if (isset($this->_allNodes[$this->_lastNode])) {
173 - $node->parent = $this->_allNodes[$this->_lastNode]->parent;
174 - }
175 - }
176 - } elseif ($this->_lastIndent < $node->indent) {
177 - if ($this->_inBlock === true) {
178 - $parent =& $this->_allNodes[$this->_lastNode];
179 - $parent->data[key($parent->data)] .= trim($line).$this->_blockEnd;
180 - } elseif ($this->_inBlock === false) {
181 - // The current node's parent is the previous node
182 - $node->parent = $this->_lastNode;
183 -
184 - // If the value of the last node's data was > or | we need to
185 - // start blocking i.e. taking in all lines as a text value until
186 - // we drop our indent.
187 - $parent =& $this->_allNodes[$node->parent];
188 - $this->_allNodes[$node->parent]->children = true;
189 - if (is_array($parent->data)) {
190 - $chk = $parent->data[key($parent->data)];
191 - if ($chk === '>') {
192 - $this->_inBlock = true;
193 - $this->_blockEnd = ' ';
194 - $parent->data[key($parent->data)] =
195 - str_replace('>','',$parent->data[key($parent->data)]);
196 - $parent->data[key($parent->data)] .= trim($line).' ';
197 - $this->_allNodes[$node->parent]->children = false;
198 - $this->_lastIndent = $node->indent;
199 - } elseif ($chk === '|') {
200 - $this->_inBlock = true;
201 - $this->_blockEnd = "\n";
202 - $parent->data[key($parent->data)] =
203 - str_replace('|','',$parent->data[key($parent->data)]);
204 - $parent->data[key($parent->data)] .= trim($line)."\n";
205 - $this->_allNodes[$node->parent]->children = false;
206 - $this->_lastIndent = $node->indent;
207 - }
208 - }
209 - }
210 - } elseif ($this->_lastIndent > $node->indent) {
211 - // Any block we had going is dead now
212 - if ($this->_inBlock === true) {
213 - $this->_inBlock = false;
214 - if ($this->_blockEnd = "\n") {
215 - $last =& $this->_allNodes[$this->_lastNode];
216 - $last->data[key($last->data)] =
217 - trim($last->data[key($last->data)]);
218 - }
219 - }
220 -
221 - // We don't know the parent of the node so we have to find it
222 - // foreach ($this->_allNodes as $n) {
223 - foreach ($this->_indentSort[$node->indent] as $n) {
224 - if ($n->indent == $node->indent) {
225 - $node->parent = $n->parent;
226 - }
227 - }
228 - }
229 -
230 - if ($this->_inBlock === false) {
231 - // Set these properties with information from our current node
232 - $this->_lastIndent = $node->indent;
233 - // Set the last node
234 - $this->_lastNode = $node->id;
235 - // Parse the YAML line and return its data
236 - $node->data = $this->_parseLine($line);
237 - // Add the node to the master list
238 - $this->_allNodes[$node->id] = $node;
239 - // Add a reference to the node in an indent array
240 - $this->_indentSort[$node->indent][] =& $this->_allNodes[$node->id];
241 - // Add a reference to the node in a References array if this node
242 - // has a YAML reference in it.
243 - if (
244 - ( (is_array($node->data)) &&
245 - isset($node->data[key($node->data)]) &&
246 - (!is_array($node->data[key($node->data)])) )
247 - &&
248 - ( (preg_match('/^&([^ ]+)/',$node->data[key($node->data)]))
249 - ||
250 - (preg_match('/^\*([^ ]+)/',$node->data[key($node->data)])) )
251 - ) {
252 - $this->_haveRefs[] =& $this->_allNodes[$node->id];
253 - } elseif (
254 - ( (is_array($node->data)) &&
255 - isset($node->data[key($node->data)]) &&
256 - (is_array($node->data[key($node->data)])) )
257 - ) {
258 - // Incomplete reference making code. Ugly, needs cleaned up.
259 - foreach ($node->data[key($node->data)] as $d) {
260 - if ( !is_array($d) &&
261 - ( (preg_match('/^&([^ ]+)/',$d))
262 - ||
263 - (preg_match('/^\*([^ ]+)/',$d)) )
264 - ) {
265 - $this->_haveRefs[] =& $this->_allNodes[$node->id];
266 - }
267 - }
268 - }
269 - }
270 - }
271 - }
272 - unset($node);
273 -
274 - // Here we travel through node-space and pick out references (& and *)
275 - $this->_linkReferences();
276 -
277 - // Build the PHP array out of node-space
278 - $trunk = $this->_buildArray();
279 - return $trunk;
280 - }
281 -
282 - /**
28355 * Dump PHP array to YAML
28456 *
28557 * The dump method, when supplied with an array, will do its best
@@ -476,408 +248,4 @@
477249 }
478250 return $value;
479251 }
480 -
481 - /* Methods used in loading */
482 -
483 - /**
484 - * Finds and returns the indentation of a YAML line
485 - * @access private
486 - * @return int
487 - * @param string $line A line from the YAML file
488 - */
489 - function _getIndent($line) {
490 - $match = array();
491 - preg_match('/^\s{1,}/',$line,$match);
492 - if (!empty($match[0])) {
493 - $indent = substr_count($match[0],' ');
494 - } else {
495 - $indent = 0;
496 - }
497 - return $indent;
498 - }
499 -
500 - /**
501 - * Parses YAML code and returns an array for a node
502 - * @access private
503 - * @return array
504 - * @param string $line A line from the YAML file
505 - */
506 - function _parseLine($line) {
507 - $line = trim($line);
508 -
509 - $array = array();
510 -
511 - if (preg_match('/^-(.*):$/',$line)) {
512 - // It's a mapped sequence
513 - $key = trim(substr(substr($line,1),0,-1));
514 - $array[$key] = '';
515 - } elseif ($line[0] == '-' && substr($line,0,3) != '---') {
516 - // It's a list item but not a new stream
517 - if (strlen($line) > 1) {
518 - $value = trim(substr($line,1));
519 - // Set the type of the value. Int, string, etc
520 - $value = $this->_toType($value);
521 - $array[] = $value;
522 - } else {
523 - $array[] = array();
524 - }
525 - } elseif (preg_match('/^(.+):/',$line,$key)) {
526 - // It's a key/value pair most likely
527 - // If the key is in double quotes pull it out
528 - $matches = array();
529 - if (preg_match('/^(["\'](.*)["\'](\s)*:)/',$line,$matches)) {
530 - $value = trim(str_replace($matches[1],'',$line));
531 - $key = $matches[2];
532 - } else {
533 - // Do some guesswork as to the key and the value
534 - $explode = explode(':',$line);
535 - $key = trim($explode[0]);
536 - array_shift($explode);
537 - $value = trim(implode(':',$explode));
538 - }
539 -
540 - // Set the type of the value. Int, string, etc
541 - $value = $this->_toType($value);
542 - if (empty($key)) {
543 - $array[] = $value;
544 - } else {
545 - $array[$key] = $value;
546 - }
547 - }
548 - return $array;
549 - }
550 -
551 - /**
552 - * Finds the type of the passed value, returns the value as the new type.
553 - * @access private
554 - * @param string $value
555 - * @return mixed
556 - */
557 - function _toType($value) {
558 - $matches = array();
559 - if (preg_match('/^("(.*)"|\'(.*)\')/',$value,$matches)) {
560 - $value = (string)preg_replace('/(\'\'|\\\\\')/',"'",end($matches));
561 - $value = preg_replace('/\\\\"/','"',$value);
562 - } elseif (preg_match('/^\\[(.+)\\]$/',$value,$matches)) {
563 - // Inline Sequence
564 -
565 - // Take out strings sequences and mappings
566 - $explode = $this->_inlineEscape($matches[1]);
567 -
568 - // Propogate value array
569 - $value = array();
570 - foreach ($explode as $v) {
571 - $value[] = $this->_toType($v);
572 - }
573 - } elseif (strpos($value,': ')!==false && !preg_match('/^{(.+)/',$value)) {
574 - // It's a map
575 - $array = explode(': ',$value);
576 - $key = trim($array[0]);
577 - array_shift($array);
578 - $value = trim(implode(': ',$array));
579 - $value = $this->_toType($value);
580 - $value = array($key => $value);
581 - } elseif (preg_match("/{(.+)}$/",$value,$matches)) {
582 - // Inline Mapping
583 -
584 - // Take out strings sequences and mappings
585 - $explode = $this->_inlineEscape($matches[1]);
586 -
587 - // Propogate value array
588 - $array = array();
589 - foreach ($explode as $v) {
590 - $array = $array + $this->_toType($v);
591 - }
592 - $value = $array;
593 - } elseif (strtolower($value) == 'null' or $value === '' or $value == '~') {
594 - $value = NULL;
595 - } elseif (ctype_digit($value)) {
596 - $value = (int)$value;
597 - } elseif (in_array(strtolower($value),
598 - array('true', 'on', '+', 'yes', 'y'))) {
599 - $value = TRUE;
600 - } elseif (in_array(strtolower($value),
601 - array('false', 'off', '-', 'no', 'n'))) {
602 - $value = FALSE;
603 - } elseif (is_numeric($value)) {
604 - $value = (float)$value;
605 - } else {
606 - // Just a normal string, right?
607 - $value = trim(preg_replace('/#(.+)$/','',$value));
608 - }
609 -
610 - return $value;
611 - }
612 -
613 - /**
614 - * Used in inlines to check for more inlines or quoted strings
615 - * @access private
616 - * @return array
617 - */
618 - function _inlineEscape($inline) {
619 - // There's gotta be a cleaner way to do this...
620 - // While pure sequences seem to be nesting just fine,
621 - // pure mappings and mappings with sequences inside can't go very
622 - // deep. This needs to be fixed.
623 -
624 - // Check for strings
625 - $regex = '/(?:(")|(?:\'))((?(1)[^"]+|[^\']+))(?(1)"|\')/';
626 - $strings = array();
627 - if (preg_match_all($regex,$inline,$strings)) {
628 - $saved_strings[] = $strings[0][0];
629 - $inline = preg_replace($regex,'YAMLString',$inline);
630 - }
631 - unset($regex);
632 -
633 - // Check for sequences
634 - $seqs = array();
635 - if (preg_match_all('/\[(.+)\]/U',$inline,$seqs)) {
636 - $inline = preg_replace('/\[(.+)\]/U','YAMLSeq',$inline);
637 - $seqs = $seqs[0];
638 - }
639 -
640 - // Check for mappings
641 - $maps = array();
642 - if (preg_match_all('/{(.+)}/U',$inline,$maps)) {
643 - $inline = preg_replace('/{(.+)}/U','YAMLMap',$inline);
644 - $maps = $maps[0];
645 - }
646 -
647 - $explode = explode(', ',$inline);
648 -
649 - // Re-add the strings
650 - if (!empty($saved_strings)) {
651 - $i = 0;
652 - foreach ($explode as $key => $value) {
653 - if (strpos($value,'YAMLString')) {
654 - $explode[$key] = str_replace('YAMLString',$saved_strings[$i],$value);
655 - ++$i;
656 - }
657 - }
658 - }
659 -
660 - // Re-add the sequences
661 - if (!empty($seqs)) {
662 - $i = 0;
663 - foreach ($explode as $key => $value) {
664 - if (strpos($value,'YAMLSeq') !== false) {
665 - $explode[$key] = str_replace('YAMLSeq',$seqs[$i],$value);
666 - ++$i;
667 - }
668 - }
669 - }
670 -
671 - // Re-add the mappings
672 - if (!empty($maps)) {
673 - $i = 0;
674 - foreach ($explode as $key => $value) {
675 - if (strpos($value,'YAMLMap') !== false) {
676 - $explode[$key] = str_replace('YAMLMap',$maps[$i],$value);
677 - ++$i;
678 - }
679 - }
680 - }
681 -
682 - return $explode;
683 - }
684 -
685 - /**
686 - * Builds the PHP array from all the YAML nodes we've gathered
687 - * @access private
688 - * @return array
689 - */
690 - function _buildArray() {
691 - $trunk = array();
692 -
693 - if (!isset($this->_indentSort[0])) {
694 - return $trunk;
695 - }
696 -
697 - foreach ($this->_indentSort[0] as $n) {
698 - if (empty($n->parent)) {
699 - $this->_nodeArrayizeData($n);
700 - // Check for references and copy the needed data to complete them.
701 - $this->_makeReferences($n);
702 - // Merge our data with the big array we're building
703 - $trunk = $this->_array_kmerge($trunk,$n->data);
704 - }
705 - }
706 -
707 - return $trunk;
708 - }
709 -
710 - /**
711 - * Traverses node-space and sets references (& and *) accordingly
712 - * @access private
713 - * @return bool
714 - */
715 - function _linkReferences() {
716 - if (is_array($this->_haveRefs)) {
717 - foreach ($this->_haveRefs as $node) {
718 - if (!empty($node->data)) {
719 - $key = key($node->data);
720 - // If it's an array, don't check.
721 - if (is_array($node->data[$key])) {
722 - foreach ($node->data[$key] as $k => $v) {
723 - $this->_linkRef($node,$key,$k,$v);
724 - }
725 - } else {
726 - $this->_linkRef($node,$key);
727 - }
728 - }
729 - }
730 - }
731 - return true;
732 - }
733 -
734 - function _linkRef(&$n,$key,$k = NULL,$v = NULL) {
735 - if (empty($k) && empty($v)) {
736 - // Look for &refs
737 - $matches = array();
738 - if (preg_match('/^&([^ ]+)/',$n->data[$key],$matches)) {
739 - // Flag the node so we know it's a reference
740 - $this->_allNodes[$n->id]->ref = substr($matches[0],1);
741 - $this->_allNodes[$n->id]->data[$key] =
742 - substr($n->data[$key],strlen($matches[0])+1);
743 - // Look for *refs
744 - } elseif (preg_match('/^\*([^ ]+)/',$n->data[$key],$matches)) {
745 - $ref = substr($matches[0],1);
746 - // Flag the node as having a reference
747 - $this->_allNodes[$n->id]->refKey = $ref;
748 - }
749 - } elseif (!empty($k) && !empty($v)) {
750 - if (preg_match('/^&([^ ]+)/',$v,$matches)) {
751 - // Flag the node so we know it's a reference
752 - $this->_allNodes[$n->id]->ref = substr($matches[0],1);
753 - $this->_allNodes[$n->id]->data[$key][$k] =
754 - substr($v,strlen($matches[0])+1);
755 - // Look for *refs
756 - } elseif (preg_match('/^\*([^ ]+)/',$v,$matches)) {
757 - $ref = substr($matches[0],1);
758 - // Flag the node as having a reference
759 - $this->_allNodes[$n->id]->refKey = $ref;
760 - }
761 - }
762 - }
763 -
764 - /**
765 - * Finds the children of a node and aids in the building of the PHP array
766 - * @access private
767 - * @param int $nid The id of the node whose children we're gathering
768 - * @return array
769 - */
770 - function _gatherChildren($nid) {
771 - $return = array();
772 - $node =& $this->_allNodes[$nid];
773 - foreach ($this->_allNodes as $z) {
774 - if ($z->parent == $node->id) {
775 - // We found a child
776 - $this->_nodeArrayizeData($z);
777 - // Check for references
778 - $this->_makeReferences($z);
779 - // Merge with the big array we're returning
780 - // The big array being all the data of the children of our parent node
781 - $return = $this->_array_kmerge($return,$z->data);
782 - }
783 - }
784 - return $return;
785 - }
786 -
787 - /**
788 - * Turns a node's data and its children's data into a PHP array
789 - *
790 - * @access private
791 - * @param array $node The node which you want to arrayize
792 - * @return boolean
793 - */
794 - function _nodeArrayizeData(&$node) {
795 - if (is_array($node->data) && $node->children == true) {
796 - // This node has children, so we need to find them
797 - $childs = $this->_gatherChildren($node->id);
798 - // We've gathered all our children's data and are ready to use it
799 - $key = key($node->data);
800 - $key = empty($key) ? 0 : $key;
801 - // If it's an array, add to it of course
802 - if (is_array($node->data[$key])) {
803 - $node->data[$key] = $this->_array_kmerge($node->data[$key],$childs);
804 - } else {
805 - $node->data[$key] = $childs;
806 - }
807 - } elseif (!is_array($node->data) && $node->children == true) {
808 - // Same as above, find the children of this node
809 - $childs = $this->_gatherChildren($node->id);
810 - $node->data = array();
811 - $node->data[] = $childs;
812 - }
813 -
814 - // We edited $node by reference, so just return true
815 - return true;
816 - }
817 -
818 - /**
819 - * Traverses node-space and copies references to / from this object.
820 - * @access private
821 - * @param object $z A node whose references we wish to make real
822 - * @return bool
823 - */
824 - function _makeReferences(&$z) {
825 - // It is a reference
826 - if (isset($z->ref)) {
827 - $key = key($z->data);
828 - // Copy the data to this object for easy retrieval later
829 - $this->ref[$z->ref] =& $z->data[$key];
830 - // It has a reference
831 - } elseif (isset($z->refKey)) {
832 - if (isset($this->ref[$z->refKey])) {
833 - $key = key($z->data);
834 - // Copy the data from this object to make the node a real reference
835 - $z->data[$key] =& $this->ref[$z->refKey];
836 - }
837 - }
838 - return true;
839 - }
840 -
841 -
842 - /**
843 - * Merges arrays and maintains numeric keys.
844 - *
845 - * An ever-so-slightly modified version of the array_kmerge() function posted
846 - * to php.net by mail at nospam dot iaindooley dot com on 2004-04-08.
847 - *
848 - * http://www.php.net/manual/en/function.array-merge.php#41394
849 - *
850 - * @access private
851 - * @param array $arr1
852 - * @param array $arr2
853 - * @return array
854 - */
855 - function _array_kmerge($arr1,$arr2) {
856 - if(!is_array($arr1))
857 - $arr1 = array();
858 -
859 - if(!is_array($arr2))
860 - $arr2 = array();
861 -
862 - $keys1 = array_keys($arr1);
863 - $keys2 = array_keys($arr2);
864 - $keys = array_merge($keys1,$keys2);
865 - $vals1 = array_values($arr1);
866 - $vals2 = array_values($arr2);
867 - $vals = array_merge($vals1,$vals2);
868 - $ret = array();
869 -
870 - foreach($keys as $key) {
871 - list( /* unused */ ,$val) = each($vals);
872 - // This is the good part! If a key already exists, but it's part of a
873 - // sequence (an int), just keep addin numbers until we find a fresh one.
874 - if (isset($ret[$key]) and is_int($key)) {
875 - while (array_key_exists($key, $ret)) {
876 - $key++;
877 - }
878 - }
879 - $ret[$key] = $val;
880 - }
881 -
882 - return $ret;
883 - }
884252 }
Index: trunk/phase3/includes/AutoLoader.php
@@ -290,7 +290,6 @@
291291 'Services_JSON_Error' => 'includes/api/ApiFormatJson_json.php',
292292 'Spyc' => 'includes/api/ApiFormatYaml_spyc.php',
293293 'UsageException' => 'includes/api/ApiMain.php',
294 - 'YAMLNode' => 'includes/api/ApiFormatYaml_spyc.php',
295294
296295 # includes/db
297296 'Blob' => 'includes/db/Database.php',

Status & tagging log