r7989 pywikipedia - Code Review archive

Repository:pywikipedia
Revision:r7988‎ | r7989 | r7990 >
Date:19:16, 11 March 2010
Author:russblau
Status:old
Tags:
Comment:
Get magic word localizations from the API instead of storing them in family files.
Modified paths:
  • /branches/rewrite/pywikibot/family.py (modified) (history)
  • /branches/rewrite/pywikibot/site.py (modified) (history)

Diff [purge]

Index: branches/rewrite/pywikibot/site.py
@@ -261,17 +261,22 @@
262262 normalizeNamespace = ns_normalize # for backwards-compatibility
263263
264264 def redirect(self, default=True):
265 - """Return the localized redirect tag for the site.
 265+ """Return list of localized redirect tags for the site.
266266
267267 If default is True, falls back to 'REDIRECT' if the site has no
268268 special redirect tag.
269269
270270 """
271 - if default:
272 - return self.family.redirect.get(self.code, [u"REDIRECT"])[0]
273 - else:
274 - return self.family.redirect.get(self.code, None)
 271+ return [u"REDIRECT"]
275272
 273+ def pagenamecodes(self, default=True):
 274+ """Return list of localized PAGENAME tags for the site."""
 275+ return [u"PAGENAME"]
 276+
 277+ def pagename2codes(self, default=True):
 278+ """Return list of localized PAGENAMEE tags for the site."""
 279+ return [u"PAGENAMEE"]
 280+
276281 def lock_page(self, page, block=True):
277282 """Lock page for writing. Must be called before writing any page.
278283
@@ -335,22 +340,14 @@
336341 """
337342 return (pywikibot.Link(s, self).site != self)
338343
339 - def redirectRegex(self):
 344+ def redirectRegex(self, pattern=None):
340345 """Return a compiled regular expression matching on redirect pages.
341346
342347 Group 1 in the regex match object will be the target title.
343348
344349 """
345 - #TODO: is this needed, since the API identifies redirects?
346 - # (maybe, the API can give false positives)
347 - default = 'REDIRECT'
348 - try:
349 - keywords = set(self.family.redirect[self.code])
350 - keywords.add(default)
351 - pattern = r'(?:' + '|'.join(keywords) + ')'
352 - except KeyError:
353 - # no localized keyword for redirects
354 - pattern = r'%s' % default
 350+ if pattern is None:
 351+ pattern = "REDIRECT"
355352 # A redirect starts with hash (#), followed by a keyword, then
356353 # arbitrary stuff, then a wikilink. The wikilink may contain
357354 # a label, although this is not useful.
@@ -847,6 +844,70 @@
848845 ts = self.getcurrenttimestamp()
849846 return pywikibot.Timestamp.fromtimestampformat(ts)
850847
 848+ def getmagicwords(self, word):
 849+ """Return list of localized "word" magic words for the site."""
 850+ if not hasattr(self, "_magicwords"):
 851+ sirequest = api.Request(
 852+ site=self,
 853+ action="query",
 854+ meta="siteinfo",
 855+ siprop="magicwords"
 856+ )
 857+ try:
 858+ sidata = sirequest.submit()
 859+ assert 'query' in sidata, \
 860+ "API siteinfo response lacks 'query' key"
 861+ sidata = sidata['query']
 862+ assert 'magicwords' in sidata, \
 863+ "API siteinfo response lacks 'magicwords' key"
 864+ self._magicwords = dict((item["name"], item["aliases"])
 865+ for item in sidata["magicwords"])
 866+
 867+ except api.APIError:
 868+ # hack for older sites that don't support 1.13 properties
 869+ # probably should delete if we're not going to support pre-1.13
 870+ self._magicwords = {}
 871+
 872+ if word in self._magicwords:
 873+ return self._magicwords[word]
 874+ else:
 875+ return [word]
 876+
 877+ def redirect(self, default=True):
 878+ """Return the preferred localized #REDIRECT keyword.
 879+
 880+ Argument is ignored (but maintained for backwards-compatibility.
 881+
 882+ """
 883+ # return the magic word without the preceding '#' character
 884+ return self.getmagicwords("redirect")[0].lstrip("#")
 885+
 886+ def redirectRegex(self):
 887+ """Return a compiled regular expression matching on redirect pages.
 888+
 889+ Group 1 in the regex match object will be the target title.
 890+
 891+ """
 892+ #TODO: is this needed, since the API identifies redirects?
 893+ # (maybe, the API can give false positives)
 894+ try:
 895+ keywords = set(s.lstrip("#")
 896+ for s in self.getmagicwords("redirect"))
 897+ keywords.add("REDIRECT") # just in case
 898+ pattern = "(?:" + "|".join(keywords) + ")"
 899+ except KeyError:
 900+ # no localized keyword for redirects
 901+ pattern = None
 902+ return BaseSite.redirectRegex(self, pattern)
 903+
 904+ def pagenamecodes(self, default=True):
 905+ """Return list of localized PAGENAME tags for the site."""
 906+ return self.getmagicwords("pagename")
 907+
 908+ def pagename2codes(self, default=True):
 909+ """Return list of localized PAGENAMEE tags for the site."""
 910+ return self.getmagicwords("pagenamee")
 911+
851912 def _getsiteinfo(self):
852913 """Retrieve siteinfo and namespaces from site."""
853914 sirequest = api.Request(
Index: branches/rewrite/pywikibot/family.py
@@ -490,7 +490,9 @@
491491
492492 # A list with the name for cross-project cookies.
493493 # default for wikimedia centralAuth extensions.
494 - self.cross_projects_cookies = ['centralauth_Session', 'centralauth_Token', 'centralauth_User']
 494+ self.cross_projects_cookies = ['centralauth_Session',
 495+ 'centralauth_Token',
 496+ 'centralauth_User']
495497 self.cross_projects_cookie_username = 'centralauth_User'
496498
497499 # A list with the name in the cross-language flag permissions
@@ -676,226 +678,9 @@
677679 return self.disambiguationTemplates[fallback]
678680 else:
679681 raise KeyError(
680 - "ERROR: title for disambig template in language %(language_code)s unknown"
 682+"ERROR: title for disambig template in language %(language_code)s unknown"
681683 % {'language_code': code})
682684
683 - # Localised magic words for language code 'xyz' can be found in
684 - # the MediaWiki source code in the file
685 - # /mediawiki/trunk/phase3/languages/messages/MessagesXyz.php
686 - # in the 'magicwords' array
687 -
688 - # Localised redirect codes
689 -
690 - # Note that redirect codes are case-insensitive, so it is enough
691 - # to enter the code in lowercase here.
692 -
693 - # When creating a redirect page, only the first item is looked for.
694 - # When matching for redirects, default 'redirect' is always inserted
695 - # => if default redirect keyword used for a language is not 'redirect',
696 - # it is not necessary to add 'redirect' at the end of the list
697 - redirect = {
698 - 'ab': [u'перенаправление', u'перенапр', u'redirect'],
699 - 'ace': [u'alih'],
700 - 'af': [u'aanstuur'],
701 - 'aln': [u'ridrejto'],
702 - 'als': [u'weiterleitung'],
703 - 'an': [u'redirección'],
704 - 'ar': [u'تحويل'],
705 - 'arn': [u'redirección'],
706 - 'arz': [u'تحويل'],
707 - 'av': [u'перенаправление', u'перенапр'],
708 - 'ay': [u'redirección'],
709 - 'ba': [u'перенаправление', u'перенапр'],
710 - 'bar': [u'weiterleitung'],
711 - 'bat-smg': [u'peradresavimas'],
712 - 'bcc': [u'تغییرمسیر'],
713 - 'be-tarask': [u'перанакіраваньне'],
714 - 'be-x-old': [u'перанакіраваньне'],
715 - 'bg': [u'виж', u'пренасочване'],
716 - 'bm': [u'redirection'],
717 - 'bqi': [u'تغییرمسیر'],
718 - 'br': [u'adkas'],
719 - 'bug': [u'alih'],
720 - 'bs': [u'preusmjeri'],
721 - 'cbk-zam': [u'redirección'],
722 - 'ce': [u'перенаправление', u'перенапр'],
723 - 'cs': [u'přesměruj'],
724 - 'cu': [u'прѣнаправлєниѥ'],
725 - 'cv': [u'перенаправление', u'перенапр'],
726 - 'cy': [u'ail-cyfeirio', u'ailgyfeirio'],
727 - 'de': [u'weiterleitung'],
728 - 'de-at': [u'weiterleitung'],
729 - 'de-ch': [u'weiterleitung'],
730 - 'de-formal': [u'weiterleitung'],
731 - 'dsb': [u'weiterleitung'],
732 - 'el': [u'ανακατευθυνση'],
733 - 'eml': [u'rinvia', u'rinvio'],
734 - 'eo': [u'alidirektu'],
735 - 'es': [u'redirección'],
736 - 'et': [u'suuna'],
737 - 'eu': [u'birzuzendu'],
738 - 'fa': [u'تغییرمسیر'],
739 - 'ff': [u'redirection'],
740 - 'fi': [u'ohjaus', u'uudelleenohjaus'],
741 - 'fiu-vro': [u'saadaq'],
742 - 'fr': [u'redirection'],
743 - 'frp': [u'redirèccion', u'redirection'],
744 - 'fur': [u'rinvia', u'rinvio'],
745 - 'ga': [u'athsheoladh'],
746 - 'gag': [u'yönlendirme'],
747 - 'gl': [u'redirección'],
748 - 'glk': [u'تغییرمسیر'],
749 - 'gn': [u'redirección'],
750 - 'gsw': [u'weiterleitung'],
751 - 'he': [u'הפניה'],
752 - 'hr': [u'preusmjeri'],
753 - 'hsb': [u'weiterleitung'],
754 - 'ht': [u'redirection'],
755 - 'hu': [u'átirányítás'],
756 - 'hy': [u'վերահղում'],
757 - 'id': [u'alih'],
758 - 'inh': [u'перенаправление', u'перенапр'],
759 - 'is': [u'tilvísun'],
760 - 'it': [u'rinvia', u'rinvio'],
761 - 'ja': [u'転送', u'リダイレクト'],
762 - 'jv': [u'alih'],
763 - 'ka': [u'გადამისამართება'],
764 - 'kaa': [u'aýdaw', u'айдау'],
765 - 'kk': [u'айдау'],
766 - 'kk-arab': [u'ايداۋ'],
767 - 'kk-cyrl': [u'АЙДАУ'],
768 - 'kk-latn': [u'aýdaw', u'айдау'],
769 - 'km': [u'\u1794\u1789\u17d2\u1787\u17bc\u1793\u1794\u1793\u17d2\u178f',
770 - u'\u1794\u17d2\u178f\u17bc\u179a\u1791\u17b8\u178f\u17b6\u17c6\u1784',
771 - u'\u1794\u17d2\u178a\u17bc\u179a\u1785\u17c6\u178e\u1784\u1787\u17be\u1784',
772 - u'ប្តូរទីតាំងទៅ'],
773 - 'ko': [u'넘겨주기'],
774 - 'ksh': [u'ömleide op', u'ömleidung'],
775 - 'kv': [u'перенаправление', u'перенапр'],
776 - 'lad': [u'redirección'],
777 - 'lb': [u'weiterleitung'],
778 - 'lbe': [u'перенаправление', u'перенапр'],
779 - 'li': [u'doorverwijzing'],
780 - 'lij': [u'rinvia', u'rinvio'],
781 - 'lld': [u'rinvia', u'rinvio'],
782 - 'lmo': [u'rinvia', u'rinvio'],
783 - 'ln': [u'redirection'],
784 - 'lt': [u'peradresavimas'],
785 - 'map-bms': [u'alih'],
786 - 'mg': [u'redirection'],
787 - 'mhr': [u'перенаправление', u'перенапр'],
788 - 'mk': [u'пренасочување', u'види'],
789 - 'ml': [u'തിരിച്ചുവിടുക', u'തിരിച്ചുവിടല്‍'],
790 - 'mo': [u'redirecteaza'],
791 - 'mr': [u'पुनर्निर्देशन'],
792 - 'mt': [u'rindirizza'],
793 - 'mwl': [u'ancaminar'],
794 - 'myv': [u'перенаправление', u'перенапр'],
795 - 'mzn': [u'تغییرمسیر'],
796 - 'nah': [u'redirección'],
797 - 'nap': [u'rinvia'],
798 - 'nds': [u'wiederleiden', u'weiterleitung'],
799 - 'nds-nl': [u'deurverwiezing', u'doorverwijzing'],
800 - 'new': [u'पुनर्निर्देश'],
801 - 'nl': [u'doorverwijzing'],
802 - 'nn': [u'omdiriger'],
803 - 'no': [u'omdirigering'],
804 - 'oc': [u'redireccion'],
805 - 'os': [u'рарвыст', u'перенаправление', u'перенапр'],
806 - 'pdc': [u'weiterleitung'],
807 - 'pl': [u'patrz', u'przekieruj', u'tam'],
808 - 'pms': [u'rinvia', u'rinvio'],
809 - 'pt': [u'redirecionamento'],
810 - 'pt-br': [u'redirecionamento'],
811 - 'qu': [u'pusapuna', u'redirección'],
812 - 'rmy': [u'redirecteaza'],
813 - 'ro': [u'redirecteaza'],
814 - 'ru': [u'перенаправление', u'перенапр'],
815 - 'sa': [u'पुनर्निदेशन'],
816 - 'sah': [u'перенаправление', u'перенапр'],
817 - 'scn': [u'rinvia', u'rinvio'],
818 - 'sd': [u'چوريو'],
819 - 'sg': [u'redirection'],
820 - 'shi': [u'تحويل'],
821 - 'si': [u'යළියොමුව'],
822 - 'sk': [u'presmeruj'],
823 - 'sl': [u'preusmeritev'],
824 - 'sli': [u'weiterleitung'],
825 - 'sq': [u'ridrejto'],
826 - 'sr': [u'преусмери', u'преусмери'],
827 - 'sr-ec': [u'преусмери'],
828 - 'sr-el': [u'preusmeri'],
829 - 'srn': [u'stir', u'doorverwijzing'],
830 - 'stq': [u'weiterleitung'],
831 - 'su': [u'alih'],
832 - 'sv': [u'omdirigering'],
833 - 'szl': [u'patrz', u'przekieruj', u'tam'],
834 - 'ta': [u'வழிமாற்று'],
835 - 'te': [u'దారిమార్పు'],
836 - 'th': [u'เปลี่ยนทาง'],
837 - 'tr': [u'yönlendirme'],
838 - 'tt': [u'yünältü'],
839 - 'tt-latn': [u'yünältü'],
840 - 'tt-cyrl': [u'перенаправление', u'перенапр'],
841 - 'ty': [u'redirection'],
842 - 'udm': [u'перенаправление', u'перенапр'],
843 - 'uk': [u'перенаправлення', u'перенаправление', u'перенапр'],
844 - 'vec': [u'rinvia', u'rinvio'],
845 - 'vep': [u'suuna'],
846 - 'vi': [u'đổi', u'đổi'],
847 - 'vls': [u'doorverwijzing'],
848 - 'vro': [u'saadaq', u'suuna'],
849 - 'wa': [u'redirection'],
850 - 'wo': [u'redirection'],
851 - 'yi': [u'ווייטערפירן', u'הפניה'],
852 - 'zea': [u'doorverwijzing']
853 - }
854 -
855 - # So can be pagename code
856 - pagename = {
857 - 'bg': [u'СТРАНИЦА'],
858 - 'he': [u'שם הדף'],
859 - 'kk': [u'БЕТАТАУЫ'],
860 - 'nn': ['SIDENAMN', 'SIDENAVN'],
861 - 'ru': [u'НАЗВАНИЕСТРАНИЦЫ'],
862 - 'sr': [u'СТРАНИЦА'],
863 - 'tt': [u'BİTİSEME']
864 - }
865 -
866 - pagenamee = {
867 - 'he': [u'שם הדף מקודד'],
868 - 'kk': [u'БЕТАТАУЫ2'],
869 - 'nn': ['SIDENAMNE', 'SIDENAVNE'],
870 - 'ru': [u'НАЗВАНИЕСТРАНИЦЫ2'],
871 - 'sr': [u'СТРАНИЦЕ']
872 - }
873 -
874 - def pagenamecodes(self, code):
875 - pos = ['PAGENAME']
876 - pos2 = []
877 - if code in self.pagename:
878 - pos = pos + self.pagename[code]
879 - elif code == 'als':
880 - return self.pagenamecodes('de')
881 - elif code == 'bm':
882 - return self.pagenamecodes('fr')
883 - for p in pos:
884 - pos2 += [p, p.lower()]
885 - return pos2
886 -
887 - def pagename2codes(self, code):
888 - pos = ['PAGENAME']
889 - pos2 = []
890 - if code in self.pagenamee:
891 - pos = pos + self.pagenamee[code]
892 - elif code == 'als':
893 - return self.pagename2codes('de')
894 - elif code == 'bm':
895 - return self.pagename2codes('fr')
896 - for p in pos:
897 - pos2 += [p, p.lower()]
898 - return pos2
899 -
900685 # Methods
901686 def protocol(self, code):
902687 """

Status & tagging log