Index: trunk/extensions/OAI/OAIRepo_body.php |
— | — | @@ -459,22 +459,17 @@ |
460 | 460 | } |
461 | 461 | } |
462 | 462 | |
463 | | - $writer = new OAIDumpWriter(); |
464 | | - |
465 | 463 | # Fetch one extra row to check if we need a resumptionToken |
466 | | - $resultSet = $this->fetchRows( $from, $until, $this->chunkSize() + 1, $resume, $metadataPrefix ); |
| 464 | + $resultSet = $this->fetchRows( $from, $until, $this->chunkSize() + 1, $resume ); |
467 | 465 | $count = min( $resultSet->numRows(), $this->chunkSize() ); |
468 | 466 | if( $count ) { |
469 | 467 | echo "<$verb>\n"; |
| 468 | + // buffer everything up |
| 469 | + $rows = array(); |
470 | 470 | $this->_lastSequence = null; |
471 | 471 | for( $i = 0; $i < $count; $i++ ) { |
472 | 472 | $row = $resultSet->fetchObject(); |
473 | | - $item = new WikiOAIRecord( $row, $writer ); |
474 | | - if( $withData ) { |
475 | | - echo $item->renderRecord( $metadataPrefix, $this->timeGranularity() ); |
476 | | - } else { |
477 | | - echo $item->renderHeader( $this->timeGranularity() ); |
478 | | - } |
| 473 | + $rows[] = $row; |
479 | 474 | $this->_lastSequence = $row->up_sequence; |
480 | 475 | } |
481 | 476 | if( $row = $resultSet->fetchObject() ) { |
— | — | @@ -483,21 +478,34 @@ |
484 | 479 | $token = "$metadataPrefix:$row->up_sequence:$limit"; |
485 | 480 | else |
486 | 481 | $token = "$metadataPrefix:$row->up_sequence"; |
| 482 | + } |
| 483 | + $resultSet->free(); |
| 484 | + // init writer |
| 485 | + $writer = $this->makeWriter($metadataPrefix,$rows); |
| 486 | + // render |
| 487 | + foreach( $rows as $row ) { |
| 488 | + $item = new WikiOAIRecord( $row, $writer ); |
| 489 | + if( $withData ) { |
| 490 | + echo $item->renderRecord( $metadataPrefix, $this->timeGranularity() ); |
| 491 | + } else { |
| 492 | + echo $item->renderHeader( $this->timeGranularity() ); |
| 493 | + } |
| 494 | + } |
| 495 | + if( isset($token) ) { |
487 | 496 | echo oaiTag( 'resumptionToken', array(), $token ) . "\n"; |
488 | 497 | } |
489 | 498 | echo "</$verb>\n"; |
490 | 499 | } else { |
491 | 500 | $this->addError( 'noRecordsMatch', 'No records available match the request.' ); |
492 | 501 | } |
493 | | - $resultSet->free(); |
494 | 502 | } |
495 | 503 | |
496 | 504 | function getRecord() { |
497 | | - $writer = new OAIDumpWriter(); |
498 | 505 | $metadataPrefix = $this->validateMetadata( 'metadataPrefix' ); |
499 | 506 | if( !$this->errorCondition() ) { |
500 | | - $row = $this->getRecordItem( $this->_request['identifier'], $metadataPrefix ); |
| 507 | + $row = $this->getRecordItem( $this->_request['identifier']); |
501 | 508 | if( !$this->errorCondition() ) { |
| 509 | + $writer = $this->makeWriter($metadataPrefix,array($row)); |
502 | 510 | $item = new WikiOAIRecord( $row, $writer ); |
503 | 511 | echo "<GetRecord>\n"; |
504 | 512 | echo $item->renderRecord( $metadataPrefix, $this->timeGranularity() ); |
— | — | @@ -506,10 +514,10 @@ |
507 | 515 | } |
508 | 516 | } |
509 | 517 | |
510 | | - function getRecordItem( $identifier, $metadataPrefix ) { |
| 518 | + function getRecordItem( $identifier) { |
511 | 519 | $pageid = $this->stripIdentifier( $identifier ); |
512 | 520 | if( $pageid ) { |
513 | | - $resultSet = $this->fetchRecord( $pageid, $metadataPrefix ); |
| 521 | + $resultSet = $this->fetchRecord( $pageid); |
514 | 522 | $row = $resultSet->fetchObject(); |
515 | 523 | $resultSet->free(); |
516 | 524 | if( $row ) { |
— | — | @@ -556,16 +564,25 @@ |
557 | 565 | wfDebugDieBacktrace( 'Bogus result.' ); |
558 | 566 | } |
559 | 567 | } |
| 568 | + |
| 569 | + function makeWriter($metadataPrefix, $rows) { |
| 570 | + if($metadataPrefix == 'lsearch'){ |
| 571 | + $res = $this->fetchReferenceData($rows); |
| 572 | + $writer = new OAILSearchWriter($res); |
| 573 | + $res->free(); |
| 574 | + return $writer; |
| 575 | + } else |
| 576 | + return new OAIDumpWriter; |
| 577 | + } |
560 | 578 | |
561 | 579 | function newSchema() { |
562 | 580 | global $wgVersion; |
563 | 581 | return version_compare( $wgVersion, '1.5alpha', 'ge' ); |
564 | 582 | } |
565 | 583 | |
566 | | - function fetchRecord( $pageid, $type ) { |
567 | | - extract( $this->_db->tableNames( 'updates', 'cur', 'page', 'revision', 'text', 'pagelinks' ) ); |
568 | | - if( $type == 'lsearch' ){ |
569 | | - $sql = "SELECT up_page,page_id,up_timestamp,up_action,up_sequence, |
| 584 | + function fetchRecord( $pageid ) { |
| 585 | + extract( $this->_db->tableNames( 'updates', 'page', 'revision', 'text' ) ); |
| 586 | + $sql = "SELECT up_page,page_id,up_timestamp,up_action,up_sequence, |
570 | 587 | page_namespace, |
571 | 588 | page_title, |
572 | 589 | old_text, |
— | — | @@ -577,29 +594,6 @@ |
578 | 595 | rev_user_text, |
579 | 596 | rev_timestamp, |
580 | 597 | page_restrictions, |
581 | | - rev_minor_edit, |
582 | | - COUNT(pl_from) as num_page_ref |
583 | | - FROM $updates |
584 | | - LEFT JOIN $page ON page_id=up_page |
585 | | - LEFT JOIN $revision ON page_latest=rev_id |
586 | | - LEFT JOIN $text ON rev_text_id=old_id |
587 | | - LEFT JOIN $pagelinks ON page_namespace=pl_namespace AND page_title=pl_title |
588 | | - WHERE up_page=" . IntVal( $pageid ) . " |
589 | | - GROUP BY up_page LIMIT 1"; |
590 | | - } else{ |
591 | | - if( $this->newSchema() ) { |
592 | | - $sql = "SELECT up_page,page_id,up_timestamp,up_action,up_sequence, |
593 | | - page_namespace, |
594 | | - page_title, |
595 | | - old_text, |
596 | | - old_flags, |
597 | | - rev_id, |
598 | | - rev_deleted, |
599 | | - rev_comment, |
600 | | - rev_user, |
601 | | - rev_user_text, |
602 | | - rev_timestamp, |
603 | | - page_restrictions, |
604 | 598 | rev_minor_edit |
605 | 599 | FROM $updates,$page,$revision,$text |
606 | 600 | WHERE up_page=" . IntVal( $pageid ) . ' |
— | — | @@ -607,34 +601,15 @@ |
608 | 602 | AND page_latest=rev_id |
609 | 603 | AND rev_text_id=old_id |
610 | 604 | LIMIT 1'; |
611 | | - } else { // FIXME: this will work only with dublin core? |
612 | | - $sql = "SELECT page_id,up_timestamp,up_action,up_sequence, |
613 | | - cur_namespace AS namespace, |
614 | | - cur_title AS title, |
615 | | - cur_text AS text, |
616 | | - '' AS flags, |
617 | | - cur_comment AS comment, |
618 | | - cur_user AS user, |
619 | | - cur_user_text AS user_text, |
620 | | - cur_timestamp AS timestamp, |
621 | | - cur_restrictions AS restrictions, |
622 | | - cur_minor_edit AS minor_edit |
623 | | - FROM $updates LEFT JOIN $cur ON cur_id=up_page |
624 | | - WHERE up_page=" . IntVal( $pageid ) . |
625 | | - ' LIMIT 1'; |
626 | | - } |
627 | | - } |
628 | 605 | |
629 | 606 | return $this->_db->resultObject( $this->_db->query( $sql ) ); |
630 | 607 | } |
631 | 608 | |
632 | | - function fetchRows( $from, $until, $chunk, $token = null, $type ) { |
633 | | - extract( $this->_db->tableNames( 'updates', 'cur', 'page', 'revision', 'text', 'pagelinks' ) ); |
| 609 | + function fetchRows( $from, $until, $chunk, $token = null ) { |
| 610 | + extract( $this->_db->tableNames( 'updates', 'page', 'revision', 'text' ) ); |
634 | 611 | $chunk = IntVal( $chunk ); |
635 | 612 | |
636 | | - // lucene-search output: joins pagelinks table to get page ranks |
637 | | - if( $type == "lsearch" ){ |
638 | | - $sql = "SELECT up_page,page_id,up_timestamp,up_action,up_sequence, |
| 613 | + $sql = "SELECT up_page,page_id,up_timestamp,up_action,up_sequence, |
639 | 614 | page_namespace, |
640 | 615 | page_title, |
641 | 616 | old_text, |
— | — | @@ -646,47 +621,12 @@ |
647 | 622 | rev_user_text, |
648 | 623 | rev_timestamp, |
649 | 624 | page_restrictions, |
650 | | - rev_minor_edit, |
651 | | - COUNT(pl_from) as num_page_ref |
652 | | - FROM $updates |
653 | | - LEFT JOIN $page ON page_id=up_page |
654 | | - LEFT JOIN $revision ON page_latest=rev_id |
655 | | - LEFT JOIN $text ON rev_text_id=old_id |
656 | | - LEFT JOIN $pagelinks ON page_namespace=pl_namespace AND page_title=pl_title"; |
657 | | - } else{ |
658 | | - if( $this->newSchema() ) { |
659 | | - $sql = "SELECT up_page,page_id,up_timestamp,up_action,up_sequence, |
660 | | - page_namespace, |
661 | | - page_title, |
662 | | - old_text, |
663 | | - old_flags, |
664 | | - rev_id, |
665 | | - rev_deleted, |
666 | | - rev_comment, |
667 | | - rev_user, |
668 | | - rev_user_text, |
669 | | - rev_timestamp, |
670 | | - page_restrictions, |
671 | 625 | rev_minor_edit |
672 | 626 | FROM $updates |
673 | 627 | LEFT JOIN $page ON page_id=up_page |
674 | 628 | LEFT JOIN $revision ON page_latest=rev_id |
675 | 629 | LEFT JOIN $text ON rev_text_id=old_id "; |
676 | | - } else { // FIXME: this will only work with dublin core? |
677 | | - $sql = "SELECT page_id,up_timestamp,up_action,up_sequence, |
678 | | - cur_namespace AS namespace, |
679 | | - cur_title AS title, |
680 | | - cur_text AS text, |
681 | | - '' AS flags, |
682 | | - cur_comment AS comment, |
683 | | - cur_user AS user, |
684 | | - cur_user_text AS user_text, |
685 | | - cur_timestamp AS timestamp, |
686 | | - cur_restrictions AS restrictions, |
687 | | - cur_minor_edit AS minor_edit |
688 | | - FROM $updates LEFT JOIN $cur ON cur_id=up_page "; |
689 | | - } |
690 | | - } |
| 630 | + |
691 | 631 | $where = array(); |
692 | 632 | if( $token ) { |
693 | 633 | $where[] = 'up_sequence >= ' . IntVal( $token ); |
— | — | @@ -703,12 +643,40 @@ |
704 | 644 | if( !empty( $where ) ) { |
705 | 645 | $sql .= ' WHERE ' . implode( ' AND ', $where ); |
706 | 646 | } |
707 | | - if($type == 'lsearch') |
708 | | - $sql .= " GROUP BY up_page"; |
709 | 647 | $sql .= " ORDER BY $order LIMIT $chunk"; |
710 | 648 | |
711 | 649 | return $this->_db->resultObject( $this->_db->query( $sql ) ); |
712 | 650 | } |
| 651 | + |
| 652 | + function fetchReferenceData( $rows ) { |
| 653 | + $page_ids = array(); |
| 654 | + foreach($rows as $row){ |
| 655 | + $page_ids[] = $row->up_page; |
| 656 | + } |
| 657 | + |
| 658 | + if(count($page_ids) == 1) |
| 659 | + $pages_where = " AND up_page = $page_ids[0] "; |
| 660 | + else |
| 661 | + $pages_where = " AND up_page IN (".implode(",",$page_ids).") "; |
| 662 | + |
| 663 | + extract( $this->_db->tableNames( 'updates', 'page', 'revision', 'text', 'pagelinks' ) ); |
| 664 | + $sql = "SELECT up_page,up_sequence, |
| 665 | + r.page_namespace AS page_namespace, |
| 666 | + r.page_title AS page_title, |
| 667 | + COUNT(pl.pl_from) AS num_page_ref |
| 668 | + FROM updates |
| 669 | + LEFT JOIN page AS p ON p.page_id=up_page |
| 670 | + LEFT JOIN pagelinks AS pl ON p.page_namespace=pl.pl_namespace AND p.page_title=pl.pl_title |
| 671 | + LEFT JOIN page AS ns ON pl.pl_from=ns.page_id |
| 672 | + LEFT JOIN page AS r ON pl.pl_from=r.page_id AND r.page_is_redirect=1 |
| 673 | + LEFT JOIN pagelinks AS rpl ON r.page_namespace=rpl.pl_namespace AND r.page_title=rpl.pl_title |
| 674 | + WHERE ns.page_namespace = p.page_namespace |
| 675 | + $pages_where |
| 676 | + GROUP BY up_page,r.page_id"; |
| 677 | + |
| 678 | + return $this->_db->resultObject( $this->_db->query( $sql ) ); |
| 679 | + } |
| 680 | + |
713 | 681 | |
714 | 682 | function identifyInfo() { |
715 | 683 | global $wgSitename; |
— | — | @@ -737,8 +705,8 @@ |
738 | 706 | 'namespace' => 'http://www.mediawiki.org/xml/export-0.3/', |
739 | 707 | 'schema' => 'http://www.mediawiki.org/xml/export-0.3.xsd' ) , |
740 | 708 | 'lsearch' => array( |
741 | | - 'namespace' => 'http://www.mediawiki.org/xml/export-0.3/', |
742 | | - 'schema' => 'http://www.mediawiki.org/xml/export-0.3.xsd' ) ); |
| 709 | + 'namespace' => 'http://www.mediawiki.org/xml/lsearch-0.1/', |
| 710 | + 'schema' => 'http://www.mediawiki.org/xml/lsearch-0.1.xsd' ) ); |
743 | 711 | } |
744 | 712 | |
745 | 713 | } |
— | — | @@ -841,10 +809,12 @@ |
842 | 810 | case 'oai_dc': |
843 | 811 | $data = $this->renderDublinCore(); |
844 | 812 | break; |
845 | | - case 'lsearch': |
846 | 813 | case 'mediawiki': |
847 | 814 | $data = $this->renderMediaWiki(); |
848 | 815 | break; |
| 816 | + case 'lsearch': |
| 817 | + $data = $this->renderLSearch(); |
| 818 | + break; |
849 | 819 | default: |
850 | 820 | wfDebugDieBacktrace( 'Unsupported metadata format.' ); |
851 | 821 | } |
— | — | @@ -887,8 +857,24 @@ |
888 | 858 | $out .= $this->_writer->closePage().$this->_writer->closeStream(); |
889 | 859 | |
890 | 860 | return $out; |
891 | | - } |
| 861 | + } |
892 | 862 | |
| 863 | + function renderLSearch() { |
| 864 | + $title = Title::makeTitle( $this->_row->page_namespace, $this->_row->page_title ); |
| 865 | + |
| 866 | + $out = $this->_writer->openStream().$this->_writer->openPage($this->_row). |
| 867 | + $this->_writer->writeRedirects($this->_row). |
| 868 | + $this->_writer->writeRevision($this->_row); |
| 869 | + |
| 870 | + if( $title->getNamespace() == NS_IMAGE ) { |
| 871 | + $out .= $this->renderUpload(); |
| 872 | + } |
| 873 | + |
| 874 | + $out .= $this->_writer->closePage().$this->_writer->closeStream(); |
| 875 | + |
| 876 | + return $out; |
| 877 | + } |
| 878 | + |
893 | 879 | function renderUpload() { |
894 | 880 | $fname = 'WikiOAIRecord::renderUpload'; |
895 | 881 | $db =& wfGetDB( DB_SLAVE ); |
— | — | @@ -957,7 +943,69 @@ |
958 | 944 | } else |
959 | 945 | return ""; |
960 | 946 | } |
| 947 | +} |
961 | 948 | |
| 949 | +/** |
| 950 | + * Extends the MW import/export format with the lsearch syntax, |
| 951 | + * i.e. schema lsearch-0.1 |
| 952 | + */ |
| 953 | +class OAILSearchWriter extends OAIDumpWriter { |
| 954 | + |
| 955 | + function __construct($resultSet){ |
| 956 | + parent::__construct(); |
| 957 | + $this->_redirects = array(); |
| 958 | + $this->_references = array(); |
| 959 | + for($i = 0 ; $i < $resultSet->numRows(); $i++){ |
| 960 | + $row = $resultSet->fetchObject(); |
| 961 | + if(isset($row->page_title)) |
| 962 | + $this->_redirects[$row->up_page][] = $row; |
| 963 | + else |
| 964 | + $this->_references[$row->up_page] = $row; |
| 965 | + |
| 966 | + } |
| 967 | + } |
| 968 | + |
| 969 | + function openStream() { |
| 970 | + global $wgContLanguageCode; |
| 971 | + $ver = "0.1"; |
| 972 | + return wfElement( 'mediawiki', array( |
| 973 | + 'xmlns' => "http://www.mediawiki.org/xml/lsearch-$ver/", |
| 974 | + 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", |
| 975 | + 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/lsearch-$ver/ " . |
| 976 | + "http://www.mediawiki.org/xml/lsearch-$ver.xsd", |
| 977 | + 'version' => $ver, |
| 978 | + 'xml:lang' => $wgContLanguageCode ), |
| 979 | + null ) . |
| 980 | + "\n" . |
| 981 | + $this->siteInfo(); |
| 982 | + } |
| 983 | + |
| 984 | + function openPage( $row ) { |
| 985 | + $out = parent::openPage( $row ); |
| 986 | + if(isset($this->_references[$row->up_page]) && isset($this->_references[$row->up_page]->num_page_ref)) |
| 987 | + $page_ref = $this->_references[$row->up_page]->num_page_ref; |
| 988 | + else |
| 989 | + $page_ref = 0; |
| 990 | + $out .= ' ' . wfElement( 'references', array(), strval( $page_ref ) ) . "\n"; |
| 991 | + return $out; |
| 992 | + } |
| 993 | + |
| 994 | + function writeRedirects($row){ |
| 995 | + $out = ''; |
| 996 | + if(isset($this->_redirects[$row->up_page])){ |
| 997 | + foreach($this->_redirects[$row->up_page] as $row){ |
| 998 | + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); |
| 999 | + $out .= " <redirect>\n"; |
| 1000 | + $out .= ' ' . wfElementClean( 'title', array(), $title->getPrefixedText() ) . "\n"; |
| 1001 | + if(isset($row->num_page_ref)) |
| 1002 | + $out .= ' ' . wfElement( 'references', array(), strval( $row->num_page_ref ) ) . "\n"; |
| 1003 | + $out .= " </redirect>\n"; |
| 1004 | + } |
| 1005 | + } |
| 1006 | + return $out; |
| 1007 | + } |
| 1008 | + |
962 | 1009 | } |
963 | 1010 | |
| 1011 | + |
964 | 1012 | ?> |
Index: trunk/extensions/OAI/lsearch-0.1.xsd |
— | — | @@ -0,0 +1,171 @@ |
| 2 | +<?xml version="1.0" encoding="UTF-8" ?> |
| 3 | +<!-- |
| 4 | + This is an XML Schema description of the format |
| 5 | + used by MediaWiki's Lucene-Search extension. |
| 6 | + |
| 7 | + Version 0.1 is based of MediaWiki import/export format 0.3 |
| 8 | + (i.e. export-0.3.xsd). With addition of one propery |
| 9 | + references in page |
| 10 | + |
| 11 | + The canonical URL to the schema document is: |
| 12 | + http://www.mediawiki.org/xml/lsearch-0.1.xsd |
| 13 | + |
| 14 | + Use the namespace: |
| 15 | + http://www.mediawiki.org/xml/lsearch-0.1/ |
| 16 | +--> |
| 17 | +<schema xmlns="http://www.w3.org/2001/XMLSchema" |
| 18 | + xmlns:mw="http://www.mediawiki.org/xml/lsearch-0.1/" |
| 19 | + targetNamespace="http://www.mediawiki.org/xml/lsearch-0.1/" |
| 20 | + elementFormDefault="qualified"> |
| 21 | + |
| 22 | + <annotation> |
| 23 | + <documentation xml:lang="en"> |
| 24 | + MediaWiki's page export format |
| 25 | + </documentation> |
| 26 | + </annotation> |
| 27 | + |
| 28 | + <!-- Need this to reference xml:lang --> |
| 29 | + <import namespace="http://www.w3.org/XML/1998/namespace" |
| 30 | + schemaLocation="http://www.w3.org/2001/xml.xsd"/> |
| 31 | + |
| 32 | + <!-- Our root element --> |
| 33 | + <element name="mediawiki" type="mw:MediaWikiType"/> |
| 34 | + |
| 35 | + <complexType name="MediaWikiType"> |
| 36 | + <sequence> |
| 37 | + <element name="siteinfo" type="mw:SiteInfoType" |
| 38 | + minOccurs="0" maxOccurs="1"/> |
| 39 | + <element name="page" type="mw:PageType" |
| 40 | + minOccurs="0" maxOccurs="unbounded"/> |
| 41 | + </sequence> |
| 42 | + <attribute name="version" type="string" use="required"/> |
| 43 | + <attribute ref="xml:lang" use="required"/> |
| 44 | + </complexType> |
| 45 | + |
| 46 | + <complexType name="SiteInfoType"> |
| 47 | + <sequence> |
| 48 | + <element name="sitename" type="string" minOccurs="0" /> |
| 49 | + <element name="base" type="anyURI" minOccurs="0" /> |
| 50 | + <element name="generator" type="string" minOccurs="0" /> |
| 51 | + <element name="case" type="mw:CaseType" minOccurs="0" /> |
| 52 | + <element name="namespaces" type="mw:NamespacesType" minOccurs="0" /> |
| 53 | + </sequence> |
| 54 | + </complexType> |
| 55 | + |
| 56 | + <simpleType name="CaseType"> |
| 57 | + <restriction base="NMTOKEN"> |
| 58 | + <!-- Cannot have two titles differing only by case of first letter. --> |
| 59 | + <!-- Default behavior through 1.5, $wgCapitalLinks = true --> |
| 60 | + <enumeration value="first-letter" /> |
| 61 | + |
| 62 | + <!-- Complete title is case-sensitive --> |
| 63 | + <!-- Behavior when $wgCapitalLinks = false --> |
| 64 | + <enumeration value="case-sensitive" /> |
| 65 | + |
| 66 | + <!-- Cannot have two titles differing only by case. --> |
| 67 | + <!-- Not yet implemented as of MediaWiki 1.5 --> |
| 68 | + <enumeration value="case-insensitive" /> |
| 69 | + </restriction> |
| 70 | + </simpleType> |
| 71 | + |
| 72 | + <complexType name="NamespacesType"> |
| 73 | + <sequence> |
| 74 | + <element name="namespace" type="mw:NamespaceType" |
| 75 | + minOccurs="0" maxOccurs="unbounded" /> |
| 76 | + </sequence> |
| 77 | + </complexType> |
| 78 | + |
| 79 | + <complexType name="NamespaceType"> |
| 80 | + <simpleContent> |
| 81 | + <extension base="string"> |
| 82 | + <attribute name="key" type="integer" /> |
| 83 | + </extension> |
| 84 | + </simpleContent> |
| 85 | + </complexType> |
| 86 | + |
| 87 | + <complexType name="RedirectType"> |
| 88 | + <sequence> |
| 89 | + <!-- Title in text form. (Using spaces, not underscores; with namespace ) --> |
| 90 | + <element name="title" type="string"/> |
| 91 | + |
| 92 | + <!-- optional page ID number --> |
| 93 | + <element name="id" type="positiveInteger" minOccurs="0"/> |
| 94 | + |
| 95 | + <!-- optional: number of pages that link to this page --> |
| 96 | + <element name="references" type="positiveInteger" minOccurs="0"/> |
| 97 | + </sequence> |
| 98 | + </complexType> |
| 99 | + |
| 100 | + <complexType name="PageType"> |
| 101 | + <sequence> |
| 102 | + <!-- Title in text form. (Using spaces, not underscores; with namespace ) --> |
| 103 | + <element name="title" type="string"/> |
| 104 | + |
| 105 | + <!-- optional page ID number --> |
| 106 | + <element name="id" type="positiveInteger" minOccurs="0"/> |
| 107 | + |
| 108 | + <!-- optional: number of pages that link to this page (without redirects) --> |
| 109 | + <element name="references" type="positiveInteger" minOccurs="0"/> |
| 110 | + |
| 111 | + <!-- 0 or more redirects to this page --> |
| 112 | + <element name="redirect" type="mw:RedirectType" minOccurs="0" maxOccurs="unbounded"/> |
| 113 | + |
| 114 | + <!-- comma-separated list of string tokens, if present --> |
| 115 | + <element name="restrictions" type="string" minOccurs="0"/> |
| 116 | + |
| 117 | + <!-- Zero or more sets of revision or upload data --> |
| 118 | + <choice minOccurs="0" maxOccurs="unbounded"> |
| 119 | + <element name="revision" type="mw:RevisionType" /> |
| 120 | + <element name="upload" type="mw:UploadType" /> |
| 121 | + </choice> |
| 122 | + </sequence> |
| 123 | + </complexType> |
| 124 | + |
| 125 | + <complexType name="RevisionType"> |
| 126 | + <sequence> |
| 127 | + <element name="id" type="positiveInteger" minOccurs="0"/> |
| 128 | + <element name="timestamp" type="dateTime"/> |
| 129 | + <element name="contributor" type="mw:ContributorType"/> |
| 130 | + <element name="minor" minOccurs="0" /> |
| 131 | + <element name="comment" type="string" minOccurs="0"/> |
| 132 | + <element name="text" type="mw:TextType" /> |
| 133 | + </sequence> |
| 134 | + </complexType> |
| 135 | + |
| 136 | + <complexType name="TextType"> |
| 137 | + <simpleContent> |
| 138 | + <extension base="string"> |
| 139 | + <attribute ref="xml:space" use="optional" default="preserve" /> |
| 140 | + </extension> |
| 141 | + </simpleContent> |
| 142 | + </complexType> |
| 143 | + |
| 144 | + <complexType name="ContributorType"> |
| 145 | + <sequence> |
| 146 | + <element name="username" type="string" minOccurs="0"/> |
| 147 | + <element name="id" type="positiveInteger" minOccurs="0" /> |
| 148 | + |
| 149 | + <element name="ip" type="string" minOccurs="0"/> |
| 150 | + </sequence> |
| 151 | + </complexType> |
| 152 | + |
| 153 | + <complexType name="UploadType"> |
| 154 | + <sequence> |
| 155 | + <!-- Revision-style data... --> |
| 156 | + <element name="timestamp" type="dateTime"/> |
| 157 | + <element name="contributor" type="mw:ContributorType"/> |
| 158 | + <element name="comment" type="string" minOccurs="0"/> |
| 159 | + |
| 160 | + <!-- Filename. (Using underscores, not spaces. No 'Image:' namespace marker.) --> |
| 161 | + <element name="filename" type="string"/> |
| 162 | + |
| 163 | + <!-- URI at which this resource can be obtained --> |
| 164 | + <element name="src" type="anyURI"/> |
| 165 | + |
| 166 | + <element name="size" type="positiveInteger" /> |
| 167 | + |
| 168 | + <!-- TODO: add other metadata fields --> |
| 169 | + </sequence> |
| 170 | + </complexType> |
| 171 | + |
| 172 | +</schema> |