r95802 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r95801‎ | r95802 | r95803 >
Date:18:36, 30 August 2011
Author:swalker
Status:deferred
Tags:
Comment:
Initial commit - script to link WikiProjects to categories
Modified paths:
  • /trunk/tools/wsor/wikiprojects/categorylinks_wikiprojects.py (added) (history)

Diff [purge]

Index: trunk/tools/wsor/wikiprojects/categorylinks_wikiprojects.py
@@ -0,0 +1,105 @@
 2+
 3+import os, sys, MySQLdb, re
 4+
 5+conn = MySQLdb.connect(
 6+host='db42',
 7+db='shawn',
 8+read_default_file=os.path.expanduser("~/.my.cnf")
 9+)
 10+
 11+conn2 = MySQLdb.connect(
 12+host='db42',
 13+db='shawn',
 14+read_default_file=os.path.expanduser("~/.my.cnf")
 15+)
 16+
 17+
 18+def getWikiProjectPage(conn, page_title):
 19+ # sys.stdout.write("Searching WikiProject Page for %s\n" % (page_title))
 20+ cursor = conn.cursor(MySQLdb.cursors.SSCursor)
 21+ #cursor.execute("""SELECT page_id FROM enwiki.page WHERE CONVERT(page_title USING latin1) like %(page_title)s AND page_namespace = 4
 22+ cursor.execute("""SELECT page_id FROM enwiki.page WHERE page_title = %(page_title)s AND page_namespace = 4
 23+ """,
 24+ {
 25+ 'page_title': page_title
 26+ }
 27+ )
 28+ edits = cursor.fetchone()
 29+ #sys.stdout.write(".")
 30+ if edits:
 31+ # sys.stdout.write("Found WikiProject Page for %s - %s\n" % (page_title, edits[0]))
 32+ return edits[0]
 33+ else:
 34+ # sys.stdout.write("NOT Found WikiProject Page for %s\n" % page_title)
 35+ return None
 36+
 37+# Given a categorylink, returnt the WikiProject name by removing anything after _article, _page, _member, or _participant
 38+def parseWikiProject(conn, page_title):
 39+ # sys.stdout.write("Parsing WikiProject Page for %s\n" % (page_title))
 40+ if re.search('_article.*$', page_title):
 41+ wp = re.sub(r'_article.*$','', page_title)
 42+
 43+ elif re.search('_page.*$', page_title):
 44+ wp = re.sub(r'_page.*$','', page_title)
 45+
 46+ elif re.search('_member.*$', page_title):
 47+ wp = re.sub(r'_member.*$','', page_title)
 48+
 49+ elif re.search('_participant.*$', page_title):
 50+ wp = re.sub(r'_participant.*$','', page_title)
 51+
 52+ else:
 53+ # sys.stdout.write("NO PARSING for %s\n" % (page_title))
 54+ return None
 55+
 56+ # sys.stdout.write("PARSED as %s\n" % (wp))
 57+ return wp
 58+
 59+
 60+
 61+ucursor = conn.cursor(MySQLdb.cursors.SSCursor)
 62+ucursor.execute("""
 63+ SELECT DISTINCT
 64+ cl_to
 65+ FROM categorylinks
 66+ """
 67+)
 68+
 69+
 70+
 71+for cat_link in ucursor:
 72+ wp_page_name = cat_link[0]
 73+ # sys.stdout.write("Working With WikiProject Category: %s\n" % cat_link[0])
 74+ wikiproject = parseWikiProject(conn2, cat_link[0])
 75+ # sys.stdout.write("-- Working With WikiProject After Parsing: %s\n" % wikiproject)
 76+ if wikiproject:
 77+ # sys.stdout.write("WikiProject parsed, setting to %s - %s\n" % (cat_link[0], wp_page_name))
 78+ wp_page_nume = wikiproject
 79+
 80+ wp_page_id = getWikiProjectPage(conn2, wp_page_name)
 81+
 82+ # didn't find a match for the wikiproject name
 83+ while not wp_page_id:
 84+ #wp_page_name = trimWikiProjectName(wp_page_name)
 85+
 86+ wp_page_name = wp_page_name[0:wp_page_name.rfind('_')]
 87+ # sys.stdout.write("-- Trimming WikiProject name to: %s\n" % wp_page_name)
 88+ wp_page_id = getWikiProjectPage(conn2, wp_page_name)
 89+ # sys.stdout.write("-- Searching for page id: %s\n" % wp_page_id)
 90+
 91+ if wp_page_id == 33631:
 92+ sys.stdout.write("-- NOT found after trimming: %s - %s - %s\n\n" % (wp_page_name, cat_link[0], wp_page_id))
 93+
 94+ if wp_page_id != 33631:
 95+ sys.stdout.write("++ FOUND after trimming: %s - %s - %s\n\n" % (wp_page_name, cat_link[0], wp_page_id))
 96+ sys.stdout.write("+")
 97+ cursor3 = conn2.cursor(MySQLdb.cursors.SSCursor)
 98+ cursor3.execute("""INSERT INTO categorylinks_wp values (%(wp_id)s, %(category)s)
 99+ """,
 100+ {
 101+ 'wp_id': wp_page_id,
 102+ 'category': cat_link[0]
 103+ }
 104+ )
 105+ else:
 106+ sys.stdout.write("-")
\ No newline at end of file

Status & tagging log