r44887 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r44886‎ | r44887 | r44888 >
Date:03:31, 22 December 2008
Author:ipye
Status:deferred
Tags:
Comment:
Merge branch 'ian-luca'
Modified paths:
  • /trunk/extensions/WikiTrust/README (modified) (history)
  • /trunk/extensions/WikiTrust/analysis/Makefile (modified) (history)
  • /trunk/extensions/WikiTrust/analysis/eval_online_wiki.ml (modified) (history)
  • /trunk/extensions/WikiTrust/analysis/online_command_line.ml (added) (history)
  • /trunk/extensions/WikiTrust/analysis/online_db.ml (modified) (history)
  • /trunk/extensions/WikiTrust/analysis/online_db.mli (modified) (history)
  • /trunk/extensions/WikiTrust/analysis/online_types.ml (modified) (history)
  • /trunk/extensions/WikiTrust/analysis/server.ml (added) (history)
  • /trunk/extensions/WikiTrust/analysis/server_coloring_dispatcher.ml (added) (history)
  • /trunk/extensions/WikiTrust/analysis/tmpfile.ml (added) (history)
  • /trunk/extensions/WikiTrust/analysis/tmpfile.mli (added) (history)
  • /trunk/extensions/WikiTrust/analysis/trust-server.cgf (added) (history)
  • /trunk/extensions/WikiTrust/analysis/vote_revision.ml (modified) (history)
  • /trunk/extensions/WikiTrust/analysis/wikipedia_api.ml (added) (history)
  • /trunk/extensions/WikiTrust/mediawiki/extensions/Trust/Trust.php (modified) (history)
  • /trunk/extensions/WikiTrust/mediawiki/extensions/Trust/TrustUpdateScripts.inc (modified) (history)

Diff [purge]

Index: trunk/extensions/WikiTrust/analysis/server_coloring_dispatcher.ml
@@ -0,0 +1,257 @@
 2+(*
 3+
 4+Copyright (c) 2007-2008 The Regents of the University of California
 5+All rights reserved.
 6+
 7+Authors: Luca de Alfaro, Ian Pye
 8+
 9+Redistribution and use in source and binary forms, with or without
 10+modification, are permitted provided that the following conditions are met:
 11+
 12+1. Redistributions of source code must retain the above copyright notice,
 13+this list of conditions and the following disclaimer.
 14+
 15+2. Redistributions in binary form must reproduce the above copyright notice,
 16+this list of conditions and the following disclaimer in the documentation
 17+and/or other materials provided with the distribution.
 18+
 19+3. The names of the contributors may not be used to endorse or promote
 20+products derived from this software without specific prior written
 21+permission.
 22+
 23+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 24+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 25+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 26+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 27+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 28+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 29+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 30+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 31+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 32+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33+POSSIBILITY OF SUCH DAMAGE.
 34+
 35+ *)
 36+
 37+(* Figures out which pages to update, and starts them going. *)
 38+
 39+open Printf
 40+open Mysql
 41+open Unix
 42+open Online_command_line
 43+open Wikipedia_api
 44+open Online_db
 45+open Online_types
 46+
 47+let max_concurrent_procs = 10
 48+let sleep_time_sec = 1
 49+let custom_line_format = [] @ command_line_format
 50+
 51+let _ = Arg.parse custom_line_format noop "Usage: dispatcher";;
 52+
 53+let working_children = Hashtbl.create max_concurrent_procs
 54+
 55+(* Prepares the database connection information *)
 56+let mediawiki_db = {
 57+ dbhost = Some !mw_db_host;
 58+ dbname = Some !mw_db_name;
 59+ dbport = Some !mw_db_port;
 60+ dbpwd = Some !mw_db_pass;
 61+ dbuser = Some !mw_db_user;
 62+}
 63+
 64+(* Here begins the sequential code *)
 65+let db = new Online_db.db !db_prefix mediawiki_db None !dump_db_calls in
 66+let logger = new Online_log.logger !log_name !synch_log in
 67+let n_processed_events = ref 0 in
 68+let trust_coeff = Online_types.get_default_coeff in
 69+
 70+(* There are two types of throttle delay: a second each time we are multiples of an int,
 71+ or a number of seconds before each revision. *)
 72+let each_event_delay = int_of_float !color_delay in
 73+let every_n_events_delay =
 74+ let frac = !color_delay -. (floor !color_delay) in
 75+ if frac > 0.001
 76+ then Some (max 1 (int_of_float (1. /. frac)))
 77+ else None
 78+in
 79+
 80+(* Wait for the processes to stop before accepting more *)
 81+let clean_kids k v = (
 82+ let stat = Unix.waitpid [WNOHANG] v in
 83+ match (stat) with
 84+ | (0,_) -> () (* Process not yet done. *)
 85+ | (_, WEXITED s) -> Hashtbl.remove working_children k (* Otherwise, remove the process. *)
 86+ | (_, WSIGNALED s) -> Hashtbl.remove working_children k
 87+ | (_, WSTOPPED s) -> Hashtbl.remove working_children k
 88+) in
 89+
 90+(* This is the function that evaluates a revision.
 91+ The function is recursive, because if some past revision of the same page
 92+ that falls within the analysis horizon is not yet evaluated and colored
 93+ for trust, it evaluates and colors it first.
 94+ *)
 95+let rec evaluate_revision (page_id: int) (rev_id: int): unit =
 96+ if !n_processed_events < !max_events_to_process then
 97+ begin
 98+ begin (* try ... with ... *)
 99+ try
 100+ Printf.printf "Evaluating revision %d of page %d\n" rev_id page_id;
 101+ let page = new Online_page.page db logger page_id rev_id trust_coeff !times_to_retry_trans in
 102+ n_processed_events := !n_processed_events + 1;
 103+ if page#eval then begin
 104+ Printf.printf "Done revision %d of page %d\n" rev_id page_id;
 105+ end else begin
 106+ Printf.printf "Revision %d of page %d was already done\n" rev_id page_id;
 107+ end;
 108+ (* Waits, if so requested to throttle the computation. *)
 109+ if each_event_delay > 0 then Unix.sleep (each_event_delay);
 110+ begin
 111+ match every_n_events_delay with
 112+ Some d -> begin
 113+ if (!n_processed_events mod d) = 0 then Unix.sleep (1);
 114+ end
 115+ | None -> ()
 116+ end;
 117+
 118+ with Online_page.Missing_trust (page_id', rev_id') ->
 119+ begin
 120+ (* We need to evaluate page_id', rev_id' first *)
 121+ (* This if is a basic sanity check only. It should always be true *)
 122+ if rev_id' <> rev_id then
 123+ begin
 124+ Printf.printf "Missing trust info: we need first to evaluate revision %d of page %d\n" rev_id' page_id';
 125+ evaluate_revision page_id' rev_id';
 126+ evaluate_revision page_id rev_id
 127+ end (* rev_id' <> rev_id *)
 128+ end (* with: Was missing trust of a previous revision *)
 129+ end (* End of try ... with ... *)
 130+ end
 131+in
 132+
 133+(* This is the code that evaluates a vote *)
 134+let evaluate_vote (page_id: int) (revision_id: int) (voter_id: int) =
 135+ if !n_processed_events < !max_events_to_process then
 136+ begin
 137+ Printf.printf "Evaluating vote by %d on revision %d of page %d\n" voter_id revision_id page_id;
 138+ let page = new Online_page.page db logger page_id revision_id trust_coeff !times_to_retry_trans in
 139+ if page#vote voter_id then begin
 140+ n_processed_events := !n_processed_events + 1;
 141+ Printf.printf "Done revision %d of page %d\n" revision_id page_id;
 142+ end;
 143+ (* Waits, if so requested to throttle the computation. *)
 144+ if each_event_delay > 0 then Unix.sleep (each_event_delay);
 145+ begin
 146+ match every_n_events_delay with
 147+ Some d -> begin
 148+ if (!n_processed_events mod d) = 0 then Unix.sleep (1);
 149+ end
 150+ | None -> ()
 151+ end;
 152+ end
 153+in
 154+
 155+(*
 156+ Returns the user id of the user name if we have it,
 157+ or asks a web service for it if we do not.
 158+*)
 159+let get_user_id u_name =
 160+ try db # get_user_id u_name with DB_Not_Found -> get_user_id u_name
 161+in
 162+
 163+(* Color the asked for revision. *)
 164+let process_revs (page_id : int) (rev_ids : int list) (page_title : string)
 165+ (rev_timestamp : string) (user_id : int) =
 166+ let rec do_processing (rev_id : int) =
 167+ (* I assume that a user cannot vote on an unprocessed revision here. *)
 168+ if (db # revision_needs_coloring rev_id) then (
 169+ (* Grab the text and color it. *)
 170+ let last_colored_timestamp = try db # get_latest_colored_rev_timestamp
 171+ page_id with DB_Not_Found -> "19700201000000" in
 172+ let (wpage, wrevs) = fetch_page_and_revs_after page_title last_colored_timestamp in
 173+ match wpage with
 174+ | None -> Printf.printf "Failed for page %s\n" page_title
 175+ | Some pp -> (
 176+ Printf.printf "Got page titled %s\n" pp.page_title;
 177+ db # write_page pp
 178+ );
 179+ let update_and_write_rev rev =
 180+ rev.revision_page <- page_id;
 181+ rev.revision_user <- (get_user_id rev.revision_user_text);
 182+ db # write_revision rev
 183+ in
 184+ List.iter update_and_write_rev wrevs;
 185+ let f rev =
 186+ evaluate_revision page_id rev.revision_id
 187+ in
 188+ List.iter f wrevs;
 189+ Unix.sleep sleep_time_sec;
 190+ if !synch_log then flush Pervasives.stdout;
 191+ if (db # revision_needs_coloring rev_id) then (
 192+ do_processing rev_id
 193+ )
 194+ else ()
 195+ ) else ( (* Vote! *)
 196+ let process_vote v = (
 197+ if v.vote_page_id == page_id then
 198+ evaluate_vote page_id rev_id v.vote_voter_id
 199+ ) in
 200+ let votes = db # fetch_unprocessed_votes !max_events_to_process in
 201+ List.iter process_vote votes
 202+ )
 203+ in
 204+ List.iter do_processing rev_ids;
 205+ Printf.printf "Finished processing page %s\n" page_title;
 206+ exit 0 (* No more work to do, stop this process. *)
 207+in
 208+
 209+(* Start a new process going which actually processes the missing page. *)
 210+let dispatch_page rev_pages =
 211+ let new_pages = Hashtbl.create (List.length rev_pages) in
 212+ let is_new_page p =
 213+ try ignore (Hashtbl.find working_children p); false with Not_found -> true
 214+ in
 215+ let set_revs_to_get (r,p,title,time,uid) =
 216+ Printf.printf "page %d\n" p;
 217+ if (is_new_page p) then (
 218+ (
 219+ let current_revs = try Hashtbl.find new_pages p with
 220+ Not_found -> ([],title,time,uid) in
 221+ (Hashtbl.replace new_pages p ((r::(let x,_,_,_ =
 222+ current_revs in x)),
 223+ title,time,uid))
 224+ )
 225+ ) else ()
 226+ in
 227+ let launch_processing p (r,t,rt,uid) = (
 228+ let new_pid = Unix.fork () in
 229+ match new_pid with
 230+ | 0 -> (
 231+ Printf.printf "I'm the child\n Running on page %d rev %d\n" p
 232+ (List.hd r);
 233+ process_revs p r t rt uid
 234+ )
 235+ | _ -> (Printf.printf "Parent of pid %d\n" new_pid;
 236+ Hashtbl.add working_children p (new_pid)
 237+ )
 238+ ) in
 239+ Hashtbl.iter clean_kids working_children;
 240+ List.iter set_revs_to_get rev_pages;
 241+ Hashtbl.iter launch_processing new_pages
 242+in
 243+
 244+(* Poll to see if there is any more work to be done. *)
 245+let rec main_loop () =
 246+ if (Hashtbl.length working_children) >= max_concurrent_procs then (
 247+ Hashtbl.iter clean_kids working_children
 248+ ) else (
 249+ let revs_to_process = db # fetch_next_to_color
 250+ (max (max_concurrent_procs - Hashtbl.length working_children) 0) in
 251+ dispatch_page revs_to_process
 252+ );
 253+ Unix.sleep sleep_time_sec;
 254+ if !synch_log then flush Pervasives.stdout;
 255+ main_loop ()
 256+in
 257+
 258+main_loop ()
Index: trunk/extensions/WikiTrust/analysis/online_types.ml
@@ -213,3 +213,33 @@
214214
215215 (* Timestamp in the DB *)
216216 type timestamp_t = int * int * int * int * int * int;;
 217+
 218+(* Types for talking with Wikipedia *)
 219+type wiki_page = {
 220+ page_id : int;
 221+ page_namespace : int;
 222+ page_title : string;
 223+ page_restrictions : string;
 224+ page_counter : int;
 225+ page_is_redirect : bool;
 226+ page_is_new : bool;
 227+ page_random : float;
 228+ page_touched : string;
 229+ page_latest : int;
 230+ page_len : int
 231+}
 232+
 233+type wiki_revision = {
 234+ revision_id : int;
 235+ mutable revision_page : int;
 236+ revision_text_id : int;
 237+ revision_comment : string;
 238+ mutable revision_user : int;
 239+ revision_user_text : string;
 240+ revision_timestamp : string;
 241+ revision_minor_edit : bool;
 242+ revision_deleted : bool;
 243+ revision_len : int;
 244+ revision_parent_id : int;
 245+ revision_content : string;
 246+}
Index: trunk/extensions/WikiTrust/analysis/tmpfile.mli
@@ -0,0 +1,25 @@
 2+(***********************************************************************)
 3+(* *)
 4+(* Objective Caml *)
 5+(* *)
 6+(* Fran�ois Pessaux, projet Cristal, INRIA Rocquencourt *)
 7+(* Pierre Weis, projet Cristal, INRIA Rocquencourt *)
 8+(* Jun Furuse, projet Cristal, INRIA Rocquencourt *)
 9+(* *)
 10+(* Copyright 1999 - 2003 *)
 11+(* Institut National de Recherche en Informatique et en Automatique. *)
 12+(* Distributed only by permission. *)
 13+(* *)
 14+(***********************************************************************)
 15+
 16+val tmp_dir : string ref
 17+(* swap file directory: the default is /tmp, but note that it is often
 18+ the case that /tmp is not large enough for some huge images!! *)
 19+
 20+val new_tmp_file_name : string -> string
 21+(* [new_swap_file_name prefix] returns a new swap file name with
 22+ prefix [prefix]. *)
 23+
 24+val remove_tmp_file : string -> unit
 25+(* [remove_tmp_file fname] removes [fname] if it can; nothing
 26+ happens if [fname] cannot be removed. *)
Index: trunk/extensions/WikiTrust/analysis/trust-server.cgf
@@ -0,0 +1,51 @@
 2+(* Configuration file for the "netplex" program. *)
 3+
 4+netplex {
 5+ controller {
 6+ max_level = "debug"; (* Log level *)
 7+ logging {
 8+ type = "stderr"; (* Log to stderr *)
 9+ }
 10+ };
 11+ service {
 12+ name = "nethttpd";
 13+ protocol {
 14+ (* This section creates the socket *)
 15+ name = "http";
 16+ address {
 17+ type = "internet";
 18+ bind = "0.0.0.0:4444";
 19+ };
 20+(*
 21+ address {
 22+ type = "internet";
 23+ bind = "[::1]:4445"; (* IPv6 example *)
 24+ }
 25+ *)
 26+ };
 27+ processor {
 28+ (* This section specifies how to process data of the socket *)
 29+ type = "nethttpd";
 30+ host {
 31+ (* Think of Apache's "virtual hosts" *)
 32+ pref_name = "localhost";
 33+ pref_port = 4444;
 34+ names = "*:0"; (* Which requests are matched here: all *)
 35+ uri {
 36+ path = "/"; (* This path is bound to the trust_store *)
 37+ service {
 38+ type = "dynamic";
 39+ handler = "trust";
 40+ }
 41+ }
 42+ };
 43+ };
 44+ workload_manager {
 45+ type = "dynamic";
 46+ max_jobs_per_thread = 1; (* Everything else is senseless *)
 47+ min_free_jobs_capacity = 1;
 48+ max_free_jobs_capacity = 1;
 49+ max_threads = 20;
 50+ };
 51+ }
 52+}
Index: trunk/extensions/WikiTrust/analysis/eval_online_wiki.ml
@@ -35,106 +35,18 @@
3636
3737 open Printf
3838 open Mysql
 39+open Online_command_line
3940
40 -(** This is a timeout for how long we wait for database locks.
41 - If we wait longer than this, then the db is too busy, and we quit all work.
42 - Notice that this provides an auto-throttling mechanism: if there are too many
43 - instances of coloring active at once, we won't get the lock quickly, and the
44 - process will terminate. *)
45 -let lock_timeout = 20
46 -(** This is the max number of revisions to color in a single db connection. *)
47 -let n_revs_color_in_one_connection = 200
48 -
49 -(** Type on analysis to perform *)
50 -type eval_type_t = EVENT | VOTE
51 -
5241 (** MissingInformation is raised if any of
5342 page_id, revision_id, or voter_uid is not specified. *)
5443 exception MissingInformation
5544
5645 (** This is the top-level code of the wiki online xml evaluation. *)
5746
58 -(* Mediawiki DB *)
59 -let mw_db_user = ref "wikiuser"
60 -let set_mw_db_user u = mw_db_user := u
61 -let mw_db_pass = ref ""
62 -let set_mw_db_pass p = mw_db_pass := p
63 -let mw_db_name = ref "wikidb"
64 -let set_mw_db_name d = mw_db_name := d
65 -let mw_db_host = ref "localhost"
66 -let set_mw_db_host d = mw_db_host := d
67 -let mw_db_port = ref 3306
68 -let set_mw_db_port d = mw_db_port := d
69 -
70 -(* Wikitrust DB *)
71 -let use_separate_dbs = ref false
72 -let wt_db_user = ref "wikiuser"
73 -let set_wt_db_user u = wt_db_user := u; use_separate_dbs := true
74 -let wt_db_pass = ref ""
75 -let set_wt_db_pass p = wt_db_pass := p; use_separate_dbs := true
76 -let wt_db_name = ref "wikidb"
77 -let set_wt_db_name d = wt_db_name := d; use_separate_dbs := true
78 -let wt_db_host = ref "localhost"
79 -let set_wt_db_host d = wt_db_host := d; use_separate_dbs := true
80 -let wt_db_port = ref 3306
81 -let set_wt_db_port d = wt_db_port := d; use_separate_dbs := true
82 -
83 -(* Other paramiters *)
84 -let db_prefix = ref ""
85 -let set_db_prefix d = db_prefix := d
86 -let log_name = ref "/dev/null"
87 -let set_log_name d = log_name := d
88 -let synch_log = ref false
89 -let noop s = ()
90 -let delete_all = ref false
91 -let reputation_speed = ref 1.
92 -let set_reputation_speed f = reputation_speed := f
93 -let requested_rev_id = ref None
94 -let set_requested_rev_id d = requested_rev_id := Some d
95 -let color_delay = ref 0.
96 -let set_color_delay f = color_delay := f
97 -let max_events_to_process = ref 100
98 -let set_max_events_to_process n = max_events_to_process := n
99 -let times_to_retry_trans = ref 3
100 -let set_times_to_retry_trans n = times_to_retry_trans := n
101 -let dump_db_calls = ref false
102 -let eval_type = ref EVENT
103 -let set_vote () = eval_type := VOTE
104 -let requested_voter_id = ref None.
105 -let set_requested_voter_id f = requested_voter_id := Some f
106 -let requested_page_id = ref None.
107 -let set_requested_page_id f = requested_page_id := Some f
108 -
10947 (* Figure out what to do and how we are going to do it. *)
110 -let command_line_format =
111 - [
112 - ("-db_prefix", Arg.String set_db_prefix, "<string>: Database table prefix (default: none)");
113 - ("-db_user", Arg.String set_mw_db_user, "<string>: Mediawiki DB username (default: wikiuser)");
114 - ("-db_name", Arg.String set_mw_db_name, "<string>: Mediawiki DB name (default: wikidb)");
115 - ("-db_pass", Arg.String set_mw_db_pass, "<string>: Mediawiki DB password");
116 - ("-db_host", Arg.String set_mw_db_host, "<string>: Mediawiki DB host (default: localhost)");
117 - ("-db_port", Arg.Int set_mw_db_port, "<int>: Mediawiki DB port (default: 3306)");
 48+let custom_line_format = [] @ command_line_format
11849
119 - ("-wt_db_user", Arg.String set_wt_db_user, "<string>: Wikitrust DB username (specify only if the wikitrust db is different from the mediawiki db) (default: wikiuser)");
120 - ("-wt_db_name", Arg.String set_wt_db_name, "<string>: Wikitrust DB name (specify only if the wikitrust db is different from the mediawiki db) (default: wikidb)");
121 - ("-wt_db_pass", Arg.String set_wt_db_pass, "<string>: Wikitrust DB password (specify only if the wikitrust db is different from the mediawiki db)");
122 - ("-wt_db_host", Arg.String set_wt_db_host, "<string>: Wikitrust DB host (specify only if the wikitrust db is different from the mediawiki db) (default: localhost)");
123 - ("-wt_db_port", Arg.Int set_wt_db_port, "<int>: Wikitrust DB port (specify only if the wikitrust db is different from the mediawiki db) (default: 3306)");
124 -
125 - ("-rev_id", Arg.Int set_requested_rev_id, "<int>: (optional) revision ID that we want to ensure it is colored");
126 - ("-log_file", Arg.String set_log_name, "<filename>: Logger output file (default: /dev/null)");
127 - ("-eval_vote", Arg.Unit set_vote, ": Just evaluate the given vote");
128 - ("-voter_id", Arg.Int set_requested_voter_id, "<int>: (optional) voter ID that we want to evaluate the vote of");
129 - ("-page_id", Arg.Int set_requested_page_id, "<int>: (optional) page ID that we want to evaluate the vote on");
130 - ("-rep_speed", Arg.Float set_reputation_speed, "<float>: Speed at which users gain reputation; 1.0 for large wikis");
131 - ("-throttle_delay", Arg.Float set_color_delay, "<float>: Amount of time (on average) to wait between analysis of events. This can be used to throttle the computation, not to use too many resources.");
132 - ("-n_events", Arg.Int set_max_events_to_process, "<int>: Max number of events to process (default: 100) ");
133 - ("-times_to_retry_trans", Arg.Int set_times_to_retry_trans, "<int>: Max number of times to retry a transation if it fails (default: 3).");
134 - ("-dump_db_calls", Arg.Set dump_db_calls, ": Writes to the db log all database calls. This is very verbose; use only for debugging.");
135 - ("-delete_all", Arg.Set delete_all, ": Recomputes all reputations and trust from scratch. BE CAREFUL!! This may take a LONG time for large wikis.");
136 - ]
137 -
138 -let _ = Arg.parse command_line_format noop "
 50+let _ = Arg.parse custom_line_format noop "
13951 This command computes user reputations and text trust for a wiki.
14052 The command assumes that the wiki database already contains some special
14153 tables for reputation and trust, and computes the missing reputation and
@@ -352,6 +264,9 @@
353265 end done; (* Loop as long as we need to do events *)
354266
355267 );
 268+
356269 (* Closes the db connection *)
357 -db#close
 270+db#close;
358271
 272+(* Close the logger *)
 273+logger#close
Index: trunk/extensions/WikiTrust/analysis/online_command_line.ml
@@ -0,0 +1,140 @@
 2+(*
 3+
 4+Copyright (c) 2007-2008 The Regents of the University of California
 5+All rights reserved.
 6+
 7+Authors: Luca de Alfaro, Ian Pye
 8+
 9+Redistribution and use in source and binary forms, with or without
 10+modification, are permitted provided that the following conditions are met:
 11+
 12+1. Redistributions of source code must retain the above copyright notice,
 13+this list of conditions and the following disclaimer.
 14+
 15+2. Redistributions in binary form must reproduce the above copyright notice,
 16+this list of conditions and the following disclaimer in the documentation
 17+and/or other materials provided with the distribution.
 18+
 19+3. The names of the contributors may not be used to endorse or promote
 20+products derived from this software without specific prior written
 21+permission.
 22+
 23+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 24+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 25+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 26+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 27+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 28+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 29+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 30+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 31+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 32+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33+POSSIBILITY OF SUCH DAMAGE.
 34+
 35+ *)
 36+
 37+(* Basic command line options and functions for the online analysis *)
 38+
 39+(** This is a timeout for how long we wait for database locks.
 40+ If we wait longer than this, then the db is too busy, and we quit all work.
 41+ Notice that this provides an auto-throttling mechanism: if there are too many
 42+ instances of coloring active at once, we won't get the lock quickly, and the
 43+ process will terminate. *)
 44+let lock_timeout = 20
 45+(** This is the max number of revisions to color in a single db connection. *)
 46+let n_revs_color_in_one_connection = 200
 47+
 48+(** Type on analysis to perform *)
 49+type eval_type_t = EVENT | VOTE
 50+
 51+(* Mediawiki DB *)
 52+let mw_db_user = ref "wikiuser"
 53+let set_mw_db_user u = mw_db_user := u
 54+let mw_db_pass = ref ""
 55+let set_mw_db_pass p = mw_db_pass := p
 56+let mw_db_name = ref "wikidb"
 57+let set_mw_db_name d = mw_db_name := d
 58+let mw_db_host = ref "localhost"
 59+let set_mw_db_host d = mw_db_host := d
 60+let mw_db_port = ref 3306
 61+let set_mw_db_port d = mw_db_port := d
 62+let db_prefix = ref ""
 63+let set_db_prefix d = db_prefix := d
 64+let dump_db_calls = ref false
 65+
 66+(* Wikitrust DB *)
 67+let use_separate_dbs = ref false
 68+let wt_db_user = ref "wikiuser"
 69+let set_wt_db_user u = wt_db_user := u; use_separate_dbs := true
 70+let wt_db_pass = ref ""
 71+let set_wt_db_pass p = wt_db_pass := p; use_separate_dbs := true
 72+let wt_db_name = ref "wikidb"
 73+let set_wt_db_name d = wt_db_name := d; use_separate_dbs := true
 74+let wt_db_host = ref "localhost"
 75+let set_wt_db_host d = wt_db_host := d; use_separate_dbs := true
 76+let wt_db_port = ref 3306
 77+let set_wt_db_port d = wt_db_port := d; use_separate_dbs := true
 78+
 79+(* Other paramiters *)
 80+let noop s = ()
 81+let db_prefix = ref ""
 82+let set_db_prefix d = db_prefix := d
 83+let log_name = ref "/dev/null"
 84+let set_log_name d = log_name := d
 85+let synch_log = ref false
 86+let delete_all = ref false
 87+let reputation_speed = ref 1.
 88+let set_reputation_speed f = reputation_speed := f
 89+let requested_rev_id = ref None
 90+let set_requested_rev_id d = requested_rev_id := Some d
 91+let color_delay = ref 0.
 92+let set_color_delay f = color_delay := f
 93+let max_events_to_process = ref 100
 94+let set_max_events_to_process n = max_events_to_process := n
 95+let times_to_retry_trans = ref 3
 96+let set_times_to_retry_trans n = times_to_retry_trans := n
 97+let dump_db_calls = ref false
 98+let eval_type = ref EVENT
 99+let set_vote () = eval_type := VOTE
 100+let requested_voter_id = ref None.
 101+let set_requested_voter_id f = requested_voter_id := Some f
 102+let requested_page_id = ref None.
 103+let set_requested_page_id f = requested_page_id := Some f
 104+
 105+(* API params *)
 106+let target_wikimedia = ref "http://en.wikipedia.org/w/api.php"
 107+let set_target_wikimedia t = target_wikimedia := t
 108+let user_id_server = ref "http://toolserver.org/~Ipye/UserName2UserId.php"
 109+let set_user_id_server t = user_id_server := t
 110+
 111+(* Figure out what to do and how we are going to do it. *)
 112+let command_line_format =
 113+ [
 114+ ("-db_prefix", Arg.String set_db_prefix, "<string>: Database table prefix (default: none)");
 115+ ("-db_user", Arg.String set_mw_db_user, "<string>: Mediawiki DB username (default: wikiuser)");
 116+ ("-db_name", Arg.String set_mw_db_name, "<string>: Mediawiki DB name (default: wikidb)");
 117+ ("-db_pass", Arg.String set_mw_db_pass, "<string>: Mediawiki DB password");
 118+ ("-db_host", Arg.String set_mw_db_host, "<string>: Mediawiki DB host (default: localhost)");
 119+ ("-db_port", Arg.Int set_mw_db_port, "<int>: Mediawiki DB port
 120+ (default: 3306)");
 121+ ("-wiki_api", Arg.String set_target_wikimedia, "<string>: Mediawiki api to target for missing revs");
 122+ ("-user_id_api", Arg.String set_user_id_server, "<string>: location of a tool which turns user_names into user_ids");
 123+ ("-dump_db_calls", Arg.Set dump_db_calls, ": Writes to the db log all
 124+ database calls. This is very verbose; use only for debugging.");
 125+ ("-wt_db_user", Arg.String set_wt_db_user, "<string>: Wikitrust DB username (specify only if the wikitrust db is different from the mediawiki db) (default: wikiuser)");
 126+ ("-wt_db_name", Arg.String set_wt_db_name, "<string>: Wikitrust DB name (specify only if the wikitrust db is different from the mediawiki db) (default: wikidb)");
 127+ ("-wt_db_pass", Arg.String set_wt_db_pass, "<string>: Wikitrust DB password (specify only if the wikitrust db is different from the mediawiki db)");
 128+ ("-wt_db_host", Arg.String set_wt_db_host, "<string>: Wikitrust DB host (specify only if the wikitrust db is different from the mediawiki db) (default: localhost)");
 129+ ("-wt_db_port", Arg.Int set_wt_db_port, "<int>: Wikitrust DB port (specify only if the wikitrust db is different from the mediawiki db) (default: 3306)");
 130+ ("-rev_id", Arg.Int set_requested_rev_id, "<int>: (optional) revision ID that we want to ensure it is colored");
 131+ ("-log_file", Arg.String set_log_name, "<filename>: Logger output file (default: /dev/null)");
 132+ ("-sync_log", Arg.Set synch_log, ": Flush writes to the log immidiatly. This is very slow; use only for debugging.");
 133+ ("-eval_vote", Arg.Unit set_vote, ": Just evaluate the given vote");
 134+ ("-voter_id", Arg.Int set_requested_voter_id, "<int>: (optional) voter ID that we want to evaluate the vote of");
 135+ ("-page_id", Arg.Int set_requested_page_id, "<int>: (optional) page ID that we want to evaluate the vote on");
 136+ ("-rep_speed", Arg.Float set_reputation_speed, "<float>: Speed at which users gain reputation; 1.0 for large wikis");
 137+ ("-throttle_delay", Arg.Float set_color_delay, "<float>: Amount of time (on average) to wait between analysis of events. This can be used to throttle the computation, not to use too many resources.");
 138+ ("-n_events", Arg.Int set_max_events_to_process, "<int>: Max number of events to process (default: 100) ");
 139+ ("-times_to_retry_trans", Arg.Int set_times_to_retry_trans, "<int>: Max number of times to retry a transation if it fails (default: 3).");
 140+ ("-delete_all", Arg.Set delete_all, ": Recomputes all reputations and trust from scratch. BE CAREFUL!! This may take a LONG time for large wikis.");
 141+ ]
Property changes on: trunk/extensions/WikiTrust/analysis/online_command_line.ml
___________________________________________________________________
Added: svn:eol-style
1142 + native
Index: trunk/extensions/WikiTrust/analysis/online_db.ml
@@ -111,6 +111,13 @@
112112 vote_revision_id = (not_null int2ml row.(2));
113113 vote_voter_id = (not_null int2ml row.(3));
114114 }
 115+
 116+let next2color_row row =
 117+ ((not_null int2ml row.(0)),
 118+ (not_null int2ml row.(1)),
 119+ (not_null str2ml row.(2)),
 120+ (not_null str2ml row.(3)),
 121+ (not_null int2ml row.(4)))
115122
116123 (** This class provides a handle for accessing the database in the on-line
117124 implementation. *)
@@ -152,7 +159,9 @@
153160 end;
154161 try
155162 Mysql.exec dbh s
156 - with _ -> raise DB_TXN_Bad
 163+ with Mysql.Error e -> if debug_mode then
 164+ begin print_endline e; flush stdout end;
 165+ raise DB_TXN_Bad
157166
158167 (* ================================================================ *)
159168 (* Disconnect *)
@@ -360,7 +369,22 @@
361370 None -> raise DB_Not_Found
362371 | Some x -> not_null int2ml x.(0)
363372
 373+ (** [get_latest_colored_rev_id page_id] returns the timestamp of the most
 374+ recent revision of page [page_id]. *)
 375+ method get_latest_colored_rev_timestamp (page_id : int) : string =
 376+ let s = Printf.sprintf "SELECT time_string FROM %swikitrust_revision WHERE page_id = %s ORDER BY time_string DESC, revision_id DESC LIMIT 1" db_prefix (ml2int page_id) in
 377+ match fetch (self#db_exec mediawiki_dbh s) with
 378+ None -> raise DB_Not_Found
 379+ | Some x -> not_null str2ml x.(0)
364380
 381+ (** [get_latest_colored_rev_id page title] returns the timestamp of
 382+ the most recent revision of page [page-title]. *)
 383+ method get_latest_colored_rev_timestamp_by_title (page_title : string) : string =
 384+ let s = Printf.sprintf "SELECT time_string FROM %swikitrust_revision AS A join page AS B on (A.page_id = B.page_id) WHERE B.page_title = %s ORDER BY time_string DESC, revision_id DESC LIMIT 1" db_prefix (ml2str page_title) in
 385+ match fetch (self#db_exec mediawiki_dbh s) with
 386+ None -> raise DB_Not_Found
 387+ | Some x -> not_null str2ml x.(0)
 388+
365389 (* ================================================================ *)
366390 (* Revision methods. *)
367391
@@ -473,6 +497,17 @@
474498 None -> raise DB_Not_Found
475499 | Some x -> not_null str2ml x.(0)
476500
 501+ (** [read_colored_markup_and_median rev_id] reads the text markup of a revision with id
 502+ [rev_id]. The markup is the text of the revision, annontated with trust
 503+ and origin information. This method also returns the median value *)
 504+ method read_colored_markup_with_median (rev_id : int) : string * float =
 505+ let s = Printf.sprintf "SELECT revision_text,median FROM %swikitrust_colored_markup JOIN %swikitrust_global WHERE revision_id = %s" db_prefix db_prefix
 506+ (ml2int rev_id) in
 507+ let result = self#db_exec wikitrust_dbh s in
 508+ match Mysql.fetch result with
 509+ None -> raise DB_Not_Found
 510+ | Some x -> (not_null str2ml x.(0), not_null float2ml x.(1))
 511+
477512 (** [write_trust_origin_sigs rev_id words trust origin sigs] writes that the
478513 revision [rev_id] is associated with [words], [trust], [origin], and [sigs]. *)
479514 method write_words_trust_origin_sigs (rev_id: int)
@@ -532,14 +567,57 @@
533568 None -> raise DB_Not_Found
534569 | Some x -> not_null float2ml x.(0)
535570
536 -
 571+
 572+ (** [get_user_id name] gets the user id for the user with the given user name *)
 573+ method get_user_id (user_name : string) : int =
 574+ let s = Printf.sprintf "SELECT user_id FROM %swikitrust_revision WHERE username = %s" db_prefix (ml2str user_name) in
 575+ let result = self#db_exec wikitrust_dbh s in
 576+ match Mysql.fetch result with
 577+ None -> raise DB_Not_Found
 578+ | Some x -> not_null int2ml x.(0)
 579+
537580 (* ================================================================ *)
538581
539582 (** Add the vote to the db *)
540583 method vote (vote : vote_t) =
541 - let s = Printf.sprintf "INSERT INTO %swikitrust_vote (rev_id, page_id, voter_id, voted_on) VALUES (%s, %s, %s, %s)" db_prefix (ml2int vote.vote_revision_id) (ml2int vote.vote_page_id) (ml2int vote.vote_voter_id) (ml2str vote.vote_time) in
 584+ let s = Printf.sprintf "INSERT INTO %swikitrust_vote (revision_id, page_id, voter_id, voted_on) VALUES (%s, %s, %s, %s)" db_prefix (ml2int vote.vote_revision_id) (ml2int vote.vote_page_id) (ml2int vote.vote_voter_id) (ml2str vote.vote_time) in
542585 ignore (self#db_exec wikitrust_dbh s)
543586
 587+ (* ================================================================ *)
 588+
 589+ (** Note that the requested rev was needs to be colored *)
 590+ method mark_to_color (rev_id : int) (page_id : int) (page_title : string)
 591+ (rev_time : string) (user_id : int) =
 592+ let s = Printf.sprintf "INSERT INTO %swikitrust_missing_revs (revision_id, page_id, page_title, rev_time, user_id) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE requested_on = now(), processed = false" db_prefix (ml2int rev_id) (ml2int page_id) (ml2str page_title) (ml2str rev_time) (ml2int user_id) in
 593+ ignore (self#db_exec wikitrust_dbh s)
 594+
 595+ (** Get the next revs to color *)
 596+ method fetch_next_to_color (max_to_get : int) :
 597+ (int * int * string * string * int) list =
 598+ let s = Printf.sprintf "SELECT revision_id, page_id, page_title, rev_time, user_id FROM %swikitrust_missing_revs WHERE NOT processed ORDER BY requested_on ASC LIMIT %s" db_prefix (ml2int max_to_get) in
 599+ let results = Mysql.map (self#db_exec wikitrust_dbh s) next2color_row in
 600+ let mark_as_done (rev,_,_,_,_) =
 601+ let s = Printf.sprintf "UPDATE %swikitrust_missing_revs SET processed = true WHERE revision_id = %s" db_prefix (ml2int rev) in
 602+ ignore (self#db_exec wikitrust_dbh s)
 603+ in
 604+ List.iter mark_as_done results;
 605+ results
 606+
 607+ (** Add the page to the db *)
 608+ method write_page (page : wiki_page) =
 609+ let s = Printf.sprintf "INSERT INTO %spage (page_id, page_namespace, page_title, page_restrictions, page_counter, page_is_redirect, page_is_new, page_random, page_touched, page_latest, page_len) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE page_latest = %s" db_prefix (ml2int page.page_id) (ml2int page.page_namespace) (ml2str page.page_title) (ml2str page.page_restrictions) (ml2int page.page_counter) (if page.page_is_redirect then "true" else "false") (if page.page_is_new then "true" else "false") (ml2float page.page_random) (ml2str page.page_touched) (ml2int page.page_latest) (ml2int page.page_len) (ml2int page.page_latest)
 610+ in
 611+ ignore (self#db_exec wikitrust_dbh s)
 612+
 613+ (** Add the rev to the db *)
 614+ method write_revision (rev : wiki_revision) =
 615+ (* Add the content. *)
 616+ let s = Printf.sprintf "INSERT INTO %stext (old_id, old_text, old_flags) VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE old_flags = %s" db_prefix (ml2int rev.revision_id) (ml2str rev.revision_content) (ml2str "utf8") (ml2str "utf8") in
 617+ ignore (self#db_exec wikitrust_dbh s);
 618+ (* And then the revision metadata. *)
 619+ let s = Printf.sprintf "INSERT INTO %srevision (rev_id, rev_page, rev_text_id, rev_comment, rev_user, rev_user_text, rev_timestamp, rev_minor_edit, rev_deleted, rev_len, rev_parent_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE rev_len = %s" db_prefix (ml2int rev.revision_id) (ml2int rev.revision_page) (ml2int rev.revision_id) (ml2str rev.revision_comment) (ml2int rev.revision_user) (ml2str rev.revision_user_text) (ml2str rev.revision_timestamp) (if rev.revision_minor_edit then "true" else "false") (if rev.revision_deleted then "true" else "false") (ml2int rev.revision_len) (ml2int rev.revision_parent_id) (ml2int rev.revision_len) in
 620+ ignore (self#db_exec wikitrust_dbh s)
 621+
544622 (** Clear everything out (except for the votes) *)
545623 method delete_all (really : bool) =
546624 match really with
Index: trunk/extensions/WikiTrust/analysis/vote_revision.ml
@@ -35,45 +35,13 @@
3636
3737 open Printf
3838 open Mysql
 39+open Online_command_line
3940
4041 (** MissingInformation is raised if any of
4142 page_id, revision_id, or voter_uid is not specified. *)
4243 exception MissingInformation
4344
4445 (** This is the top-level code for voting on revisions. *)
45 -
46 -(* Mediawiki DB *)
47 -let mw_db_user = ref "wikiuser"
48 -let set_mw_db_user u = mw_db_user := u
49 -let mw_db_pass = ref ""
50 -let set_mw_db_pass p = mw_db_pass := p
51 -let mw_db_name = ref "wikidb"
52 -let set_mw_db_name d = mw_db_name := d
53 -let mw_db_host = ref "localhost"
54 -let set_mw_db_host d = mw_db_host := d
55 -let mw_db_port = ref 3306
56 -let set_mw_db_port d = mw_db_port := d
57 -
58 -(* Wikitrust DB *)
59 -let use_separate_dbs = ref false
60 -let wt_db_user = ref "wikiuser"
61 -let set_wt_db_user u = wt_db_user := u; use_separate_dbs := true
62 -let wt_db_pass = ref ""
63 -let set_wt_db_pass p = wt_db_pass := p; use_separate_dbs := true
64 -let wt_db_name = ref "wikidb"
65 -let set_wt_db_name d = wt_db_name := d; use_separate_dbs := true
66 -let wt_db_host = ref "localhost"
67 -let set_wt_db_host d = wt_db_host := d; use_separate_dbs := true
68 -let wt_db_port = ref 3306
69 -let set_wt_db_port d = wt_db_port := d; use_separate_dbs := true
70 -
71 -(* Other paramiters *)
72 -let db_prefix = ref ""
73 -let set_db_prefix d = db_prefix := d
74 -let log_name = ref "/dev/null"
75 -let set_log_name d = log_name := d
76 -let synch_log = ref false
77 -let noop s = ()
7846 let rev_id_opt = ref None
7947 let set_rev_id d = rev_id_opt := Some d
8048 let page_id_opt = ref None
@@ -82,37 +50,18 @@
8351 let set_voter_id d = voter_id_opt := Some d
8452 let vote_time_opt = ref None
8553 let set_vote_time d = vote_time_opt := Some d
86 -let times_to_retry_trans = ref 3
87 -let set_times_to_retry_trans n = times_to_retry_trans := n
88 -let dump_db_calls = ref false
8954
 55+
9056 (* Figure out what to do and how we are going to do it. *)
91 -let command_line_format =
 57+let custom_line_format =
9258 [
93 - ("-db_prefix", Arg.String set_db_prefix, "<string>: Database table prefix (default: none)");
94 - ("-db_user", Arg.String set_mw_db_user, "<string>: Mediawiki DB username (default: wikiuser)");
95 - ("-db_name", Arg.String set_mw_db_name, "<string>: Mediawiki DB name (default: wikidb)");
96 - ("-db_pass", Arg.String set_mw_db_pass, "<string>: Mediawiki DB password");
97 - ("-db_host", Arg.String set_mw_db_host, "<string>: Mediawiki DB host (default: localhost)");
98 - ("-db_port", Arg.Int set_mw_db_port, "<int>: Mediawiki DB port (default: 3306)");
99 -
100 - ("-wt_db_user", Arg.String set_wt_db_user, "<string>: Wikitrust DB username (specify only if the wikitrust db is different from the mediawiki db) (default: wikiuser)");
101 - ("-wt_db_name", Arg.String set_wt_db_name, "<string>: Wikitrust DB name (specify only if the wikitrust db is different from the mediawiki db) (default: wikidb)");
102 - ("-wt_db_pass", Arg.String set_wt_db_pass, "<string>: Wikitrust DB password (specify only if the wikitrust db is different from the mediawiki db)");
103 - ("-wt_db_host", Arg.String set_wt_db_host, "<string>: Wikitrust DB host (specify only if the wikitrust db is different from the mediawiki db) (default: localhost)");
104 - ("-wt_db_port", Arg.Int set_wt_db_port, "<int>: Wikitrust DB port (specify only if the wikitrust db is different from the mediawiki db) (default: 3306)");
105 -
10659 ("-rev_id", Arg.Int set_rev_id, "<int>: revision ID that is voted");
10760 ("-page_id", Arg.Int set_page_id, "<int>: page ID that is voted");
10861 ("-voter_id", Arg.Int set_voter_id, "<int>: user ID that votes");
10962 ("-vote_time", Arg.String set_vote_time, "<string>: timestamp for the vote in form YYYYMMDDHHMMSS. Ex: 20080927231134");
 63+ ] @ command_line_format
11064
111 - ("-log_file", Arg.String set_log_name, "<filename>: Logger output file (default: /dev/null)");
112 - ("-times_to_retry_trans", Arg.Int set_times_to_retry_trans, "<int>: Max number of times to retry a transation if it fails (default: 3).");
113 - ("-dump_db_calls", Arg.Set dump_db_calls, ": Writes to the db log all database calls. This is very verbose; use only for debugging.");
114 - ]
115 -
116 -let _ = Arg.parse command_line_format noop "
 65+let _ = Arg.parse custom_line_format noop "
11766 This command lets users vote for the quality of revisions.
11867 In the call
11968 vote_revision ... -rev_id 4 -page_id 5 -voter_id 6 ...
Index: trunk/extensions/WikiTrust/analysis/wikipedia_api.ml
@@ -0,0 +1,193 @@
 2+(*
 3+
 4+Copyright (c) 2007-2008 The Regents of the University of California
 5+All rights reserved.
 6+
 7+Authors: Luca de Alfaro, Ian Pye
 8+
 9+Redistribution and use in source and binary forms, with or without
 10+modification, are permitted provided that the following conditions are met:
 11+
 12+1. Redistributions of source code must retain the above copyright notice,
 13+this list of conditions and the following disclaimer.
 14+
 15+2. Redistributions in binary form must reproduce the above copyright notice,
 16+this list of conditions and the following disclaimer in the documentation
 17+and/or other materials provided with the distribution.
 18+
 19+3. The names of the contributors may not be used to endorse or promote
 20+products derived from this software without specific prior written
 21+permission.
 22+
 23+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 24+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 25+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 26+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 27+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 28+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 29+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 30+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 31+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 32+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33+POSSIBILITY OF SUCH DAMAGE.
 34+
 35+ *)
 36+
 37+(* Using the wikipedia API, retrieves information about pages and revisions *)
 38+
 39+open Http_client;;
 40+open ExtLib;;
 41+open Gzip;;
 42+open Xml;;
 43+open Online_types;;
 44+open Str;;
 45+
 46+exception Http_client_error
 47+
 48+Random.self_init ()
 49+
 50+let pipeline = new pipeline
 51+let buf_len = 8192
 52+let requested_encoding_type = "gzip"
 53+let tmp_prefix = "wiki"
 54+let rev_lim = "50"
 55+let api_tz_re = Str.regexp "\\([0-9][0-9][0-9][0-9]\\)-\\([0-9][0-9]\\)-\\([0-9][0-9]\\)T\\([0-9][0-9]\\):\\([0-9][0-9]\\):\\([0-9][0-9]\\)Z"
 56+
 57+(* Maps the Wikipedias api timestamp to our internal one. *)
 58+let api_ts2mw_ts s =
 59+ let ts = if string_match api_tz_re s 0 then
 60+ (matched_group 1 s) ^ (matched_group 2 s) ^ (matched_group 3 s)
 61+ ^ (matched_group 4 s) ^ (matched_group 5 s) ^ (matched_group 6 s)
 62+ else "19700201000000" in
 63+ ts
 64+
 65+(* Given an input channel, return a string representing all there is
 66+ to be read of this channel. *)
 67+let input_all ic =
 68+ let rec loop acc total buf ofs =
 69+ let n = input ic buf ofs (buf_len - ofs) in
 70+ if n = 0 then
 71+ let res = String.create total in
 72+ let pos = total - ofs in
 73+ let _ = String.blit buf 0 res pos ofs in
 74+ let coll pos buf =
 75+ let new_pos = pos - buf_len in
 76+ String.blit buf 0 res new_pos buf_len;
 77+ new_pos in
 78+ let _ = List.fold_left coll pos acc in
 79+ res
 80+ else
 81+ let new_ofs = ofs + n in
 82+ let new_total = total + n in
 83+ if new_ofs = buf_len then
 84+ loop (buf :: acc) new_total (String.create buf_len) 0
 85+ else loop acc new_total buf new_ofs in
 86+ loop [] 0 (String.create buf_len) 0
 87+
 88+(*
 89+ Given a string url, make a get call and return the response as a string.
 90+*)
 91+let run_call url =
 92+ let call = new get url in
 93+ let request_header = call # request_header `Base in
 94+ (* Accept gziped format *)
 95+ request_header # update_field "Accept-encoding" requested_encoding_type;
 96+ call # set_request_header request_header;
 97+ pipeline # add call;
 98+ pipeline # run();
 99+ match call # status with
 100+ | `Successful -> (
 101+ let body = call # response_body # value in
 102+ let repsponse_header = call # response_header in
 103+ Printf.printf "content_type: %s\n"
 104+ (let cnt,_ = (repsponse_header # content_type ()) in cnt);
 105+ match (repsponse_header # content_type ()) with
 106+ | ("text/xml",_) -> (
 107+ let tmp_file = Tmpfile.new_tmp_file_name tmp_prefix in
 108+ Std.output_file ~filename:tmp_file ~text:body;
 109+ let in_chan = Gzip.open_in tmp_file in
 110+ let decoded_body = input_all in_chan in
 111+ Gzip.close_in in_chan;
 112+ Tmpfile.remove_tmp_file tmp_file;
 113+ decoded_body
 114+ )
 115+ | _ -> body
 116+ )
 117+ | _ -> raise Http_client_error
 118+;;
 119+
 120+(*
 121+ Internal xml processing for the api
 122+*)
 123+let process_rev (rev : xml) : wiki_revision =
 124+ let w_rev = {
 125+ revision_id = int_of_string (Xml.attrib rev "revid");
 126+ revision_page = 0;
 127+ revision_text_id = int_of_string (Xml.attrib rev "revid");
 128+ revision_comment = (try (Xml.attrib rev "comment")
 129+ with Xml.No_attribute e -> "");
 130+ revision_user = -1;
 131+ revision_user_text = (Xml.attrib rev "user");
 132+ revision_timestamp = api_ts2mw_ts (Xml.attrib rev "timestamp");
 133+ revision_minor_edit = (try ignore(Xml.attrib rev "minor"); true
 134+ with Xml.No_attribute e -> false);
 135+ revision_deleted = false;
 136+ revision_len = (try int_of_string (Xml.attrib rev "size") with Xml.No_attribute e -> 0);
 137+ revision_parent_id = 0;
 138+ revision_content = (Netencoding.Html.decode ~in_enc:`Enc_utf8
 139+ ~out_enc:`Enc_utf8 ()
 140+ (Xml.to_string (List.hd (Xml.children rev))));
 141+ } in
 142+ w_rev
 143+
 144+(*
 145+ Internal xml processing for the api
 146+*)
 147+let process_page (page : xml) : (wiki_page option * wiki_revision list) =
 148+ let w_page = {
 149+ page_id = int_of_string (Xml.attrib page "pageid");
 150+ page_namespace = (int_of_string (Xml.attrib page "ns"));
 151+ page_title = (Xml.attrib page "title");
 152+ page_restrictions = "";
 153+ page_counter = int_of_string (Xml.attrib page "counter");
 154+ page_is_redirect = (try ignore(Xml.attrib page "redirect"); true
 155+ with Xml.No_attribute e -> false);
 156+ page_is_new = false;
 157+ page_random = (Random.float 1.0);
 158+ page_touched = api_ts2mw_ts (Xml.attrib page "touched");
 159+ page_latest = int_of_string (Xml.attrib page "lastrevid");
 160+ page_len = int_of_string (Xml.attrib page "length")
 161+ } in
 162+ let revs = Xml.children page in
 163+ (Some w_page, (Xml.map process_rev (List.hd revs)))
 164+
 165+(*
 166+ Given a page and date to start with, returns the next n revs for this page.
 167+*)
 168+let fetch_page_and_revs_after (page_title : string) (rev_date : string) : (wiki_page option * wiki_revision list) =
 169+ let url = !Online_command_line.target_wikimedia
 170+ ^ "?action=query&prop=revisions|"
 171+ ^ "info&format=xml&inprop=&rvprop=ids|flags|timestamp|user|size|comment|"
 172+ ^ "content&rvstart=" ^ rev_date ^ "&rvlimit=" ^ rev_lim
 173+ ^ "&rvdir=newer&titles=" ^ page_title in
 174+ if !Online_command_line.dump_db_calls then Printf.printf "%s\n" url;
 175+ let res = run_call url in
 176+ let api = Xml.parse_string res in
 177+ let query = Xml.children (api) in
 178+ let poss_pages = Xml.children (List.hd query) in
 179+ let pick_page acc page =
 180+ if (Xml.tag page = "pages") then
 181+ process_page (List.hd (Xml.children page))
 182+ else acc
 183+ in
 184+ List.fold_left pick_page (None,[]) poss_pages
 185+;;
 186+
 187+(* Given a user_name, returns the corresponding user_id *)
 188+let get_user_id (user_name : string) : int =
 189+ let url = !Online_command_line.user_id_server ^ "?n=" ^ user_name in
 190+ if !Online_command_line.dump_db_calls then Printf.printf "%s\n" url;
 191+ let uids = ExtString.String.nsplit (run_call url) "`" in
 192+ let uid = List.nth uids 1 in
 193+ try int_of_string uid with int_of_string -> 0 in
 194+;;
Index: trunk/extensions/WikiTrust/analysis/tmpfile.ml
@@ -0,0 +1,37 @@
 2+(***********************************************************************)
 3+(* *)
 4+(* Objective Caml *)
 5+(* *)
 6+(* Fran�ois Pessaux, projet Cristal, INRIA Rocquencourt *)
 7+(* Pierre Weis, projet Cristal, INRIA Rocquencourt *)
 8+(* Jun Furuse, projet Cristal, INRIA Rocquencourt *)
 9+(* *)
 10+(* Copyright 1999 - 2003 *)
 11+(* Institut National de Recherche en Informatique et en Automatique. *)
 12+(* Distributed only by permission. *)
 13+(* *)
 14+(***********************************************************************)
 15+
 16+(* temporary directory *)
 17+let tmp_dir = ref (try Sys.getenv "CAMLTMPDIR" with Not_found -> "/tmp");;
 18+
 19+let cnter = ref 0;;
 20+
 21+let rec new_tmp_name prefx =
 22+ incr cnter;
 23+ let name =
 24+ Filename.concat !tmp_dir
 25+ (Printf.sprintf "camltmp-%s-%d" prefx !cnter) in
 26+ if not (Sys.file_exists name) then name else begin
 27+ prerr_endline ("Warning: tmp file " ^ name ^ " already exists");
 28+ new_tmp_name prefx
 29+ end;;
 30+
 31+let remove_tmp_file tmpfile = try Sys.remove tmpfile with _ -> ();;
 32+
 33+let new_tmp_file_name prefx =
 34+ if not (Sys.file_exists !tmp_dir) then
 35+ failwith ("Temporary directory " ^ !tmp_dir ^ " does not exist") else
 36+ let f = new_tmp_name prefx in
 37+ at_exit (fun () -> remove_tmp_file f);
 38+ f;;
Index: trunk/extensions/WikiTrust/analysis/online_db.mli
@@ -180,7 +180,14 @@
181181 recent revision of page [page_id]. *)
182182 method get_latest_rev_id : int -> int
183183
 184+ (** [get_latest_colored_rev_id page_id] returns the timestamp of the most
 185+ recent revision of page [page_id]. *)
 186+ method get_latest_colored_rev_timestamp : int -> string
184187
 188+ (** [get_latest_colored_rev_id page title] returns the timestamp of
 189+ the most recent revision of page [page-title]. *)
 190+ method get_latest_colored_rev_timestamp_by_title : string -> string
 191+
185192 (* ================================================================ *)
186193 (* Revision methods. We assume we have a lock on the page to which
187194 the revision belongs when calling these methods. *)
@@ -229,6 +236,9 @@
230237 and origin information. *)
231238 method read_colored_markup : int -> string
232239
 240+ (** Same as above but returns the median info as well. *)
 241+ method read_colored_markup_with_median : int -> string * float
 242+
233243 (** [write_trust_origin_sigs rev_id words trust origin sigs] writes that the
234244 revision [rev_id] is associated with [words], [trust], [origin], and [sigs]. *)
235245 method write_words_trust_origin_sigs :
@@ -259,6 +269,9 @@
260270 relating user ids to their reputation *)
261271 method get_rep : int -> float
262272
 273+ (** [get_user_id name] gets the user id for the user with the given user name *)
 274+ method get_user_id : string -> int
 275+
263276 (* ================================================================ *)
264277 (* Debugging. *)
265278
@@ -269,4 +282,19 @@
270283 (** Add the vote to the db *)
271284 method vote : vote_t -> unit
272285
 286+ (* ================================================================ *)
 287+ (* Server System. *)
 288+
 289+ (** Note that the requested rev was needs to be colored *)
 290+ method mark_to_color : int -> int -> string -> string -> int -> unit
 291+
 292+ (** Get the next revs to color *)
 293+ method fetch_next_to_color : int -> (int * int * string * string * int) list
 294+
 295+ (** Add the page to the db *)
 296+ method write_page : wiki_page -> unit
 297+
 298+ (** Add the rev to the db *)
 299+ method write_revision : wiki_revision -> unit
 300+
273301 end
Index: trunk/extensions/WikiTrust/analysis/Makefile
@@ -30,7 +30,7 @@
3131 # POSSIBILITY OF SUCH DAMAGE.
3232
3333 # Vars we use in our rules to build ocaml programs
34 -PACKAGES = unix,str,vec,mapmin,hashtbl_bounded,fileinfo,intvmap,extlib,mysql,sexplib.syntax
 34+PACKAGES = unix,str,vec,mapmin,hashtbl_bounded,fileinfo,intvmap,extlib,mysql,netsys,netclient,camlzip,xml-light,sexplib.syntax
3535 SYNTAX = camlp4o
3636 OUR_LIBS = evalwiki.cma
3737 OUR_LIBS_OPT = evalwiki.cmxa
@@ -72,9 +72,11 @@
7373 # one for the optimizing compilation.
7474 OUR_ONLINE_OBJS = online_types.cmo online_db.cmo online_revision.cmo \
7575 db_page.cmo online_page.cmo online_log.cmo event_feed.cmo \
 76+ online_command_line.cmo tmpfile.cmo wikipedia_api.cmo \
7677
7778 OUR_OPT_ONLINE_OBJS = online_types.cmx online_db.cmx online_revision.cmx \
7879 db_page.cmx online_page.cmx online_log.cmx event_feed.cmx \
 80+ online_command_line.cmx tmpfile.cmx wikipedia_api.cmx \
7981
8082
8183 online_eval: $(OUR_ONLINE_OBJS)
@@ -89,22 +91,37 @@
9092 vote_revisionopt: $(OUR_OPT_ONLINE_OBJS)
9193 $(OCAMLOPT) -linkpkg -o vote_revision $(OCAMLOPT_FLAGS) $(OUR_LIBS_OPT) $(OUR_OPT_ONLINE_OBJS) vote_revision.ml
9294
 95+server: $(OUR_ONLINE_OBJS)
 96+ $(OCAMLC) -package "netstring,netcgi2,unix,nethttpd-for-netcgi2,netplex" -linkpkg -o server $(OCAML_CFLAGS) $(OUR_LIBS) $(OUR_ONLINE_OBJS) -thread server.ml
9397
 98+serveropt: $(OUR_OPT_ONLINE_OBJS)
 99+ $(OCAMLOPT) -package "netstring,netcgi2,unix,nethttpd-for-netcgi2,netplex" -linkpkg -o server $(OCAMLOPT_FLAGS) $(OUR_LIBS_OPT) $(OUR_OPT_ONLINE_OBJS) -thread server.ml
 100+
 101+dispatcher: $(OUR_ONLINE_OBJS)
 102+ $(OCAMLC) -linkpkg -o dispatcher $(OCAML_CFLAGS) $(OUR_LIBS) $(OUR_ONLINE_OBJS) server_coloring_dispatcher.ml
 103+
 104+dispatcheropt: $(OUR_OPT_ONLINE_OBJS)
 105+ $(OCAMLOPT) -linkpkg -o dispatcher $(OCAMLOPT_FLAGS) $(OUR_LIBS_OPT) $(OUR_OPT_ONLINE_OBJS) server_coloring_dispatcher.ml
 106+
94107 all:
95108 cd ../batch/analysis; make all
96109 make online_eval
97110 make vote_revision
 111+ make server
 112+ make dispatcher
98113
99114 allopt:
100115 cd ../batch/analysis; make allopt
101116 make online_evalopt
102117 make vote_revisionopt
 118+ make serveropt
 119+ make dispatcheropt
103120
104121 universe: all allopt
105122
106123 clean:
107124 cd ../batch/analysis; make clean
108 - rm -f *.o *.cmo *.cmx *.cmi .depends run_harness eval_online_wiki vote_revision
 125+ rm -f *.o *.cmo *.cmx *.cmi .depends run_harness eval_online_wiki vote_revision server dispatcher
109126
110127 # Boilerplate code for building ocaml dependencies.
111128
Index: trunk/extensions/WikiTrust/analysis/server.ml
@@ -0,0 +1,235 @@
 2+(* This is a webserver built from the Netplex and Nethttpd components.
 3+ * It is configured in the netplex.cfg file.
 4+ * Note: start program with option "-conf netplex.cfg"
 5+ * The basic code is copied from the nethttpd example.
 6+ *)
 7+
 8+open Netcgi1_compat.Netcgi_types;;
 9+open Printf;;
 10+open Mysql;;
 11+open Online_db;;
 12+open Online_command_line;;
 13+open Gzip;;
 14+
 15+let tmp_prefix = "wiki-com"
 16+let not_found_text_token = "TEXT_NOT_FOUND"
 17+let sleep_time_sec = 3
 18+
 19+let dbh = ref None
 20+
 21+let text = Netencoding.Html.encode_from_latin1;;
 22+(* This function encodes "<", ">", "&", double quotes, and Latin 1 characters
 23+ * as character entities. E.g. text "<" = "&lt;", and text "�" = "&auml;"
 24+ *)
 25+
 26+let compress_str raw =
 27+ let tmp_file = Tmpfile.new_tmp_file_name tmp_prefix in
 28+ let out = Gzip.open_out tmp_file in
 29+ Gzip.output out raw 0 (String.length raw);
 30+ Gzip.close_out out;
 31+ let compressed = Std.input_file ?bin:(Some true) tmp_file in
 32+ Tmpfile.remove_tmp_file tmp_file;
 33+ compressed
 34+;;
 35+
 36+let handle_missing_rev (rev_id : int) (page_id : int) (page_title : string)
 37+ (rev_time : string) (user_id : int) =
 38+ match !dbh with
 39+ | Some db -> (
 40+ db # mark_to_color rev_id page_id page_title rev_time user_id;
 41+ Unix.sleep sleep_time_sec;
 42+ try (db # read_colored_markup_with_median rev_id) with
 43+ | Online_db.DB_Not_Found -> (not_found_text_token,1.0)
 44+ )
 45+ | None -> ("DB not initialized",1.0)
 46+
 47+(* Return colored markup *)
 48+let generate_text_page (cgi : Netcgi.cgi_activation) (rev_id : int)
 49+ (page_id : int) (page_title : string) (rev_time : string) (user_id : int)
 50+ =
 51+ let out = cgi # out_channel # output_string in
 52+ let safe_page_title = Mysql.escape page_title in
 53+ let safe_rev_time = Mysql.escape rev_time in
 54+ match !dbh with
 55+ | Some db -> (
 56+ let (colored_text,median) =
 57+ try (db # read_colored_markup_with_median rev_id)
 58+ with Online_db.DB_Not_Found -> (handle_missing_rev rev_id page_id
 59+ safe_page_title safe_rev_time
 60+ user_id)
 61+ in
 62+ if colored_text != not_found_text_token then
 63+ let compressed = compress_str ((string_of_float median) ^
 64+ "," ^ colored_text) in
 65+ cgi # set_header
 66+ ~content_type:"application/x-gzip"
 67+ ~content_length:(String.length compressed)
 68+ ();
 69+ out compressed
 70+ else
 71+ out colored_text
 72+ )
 73+ | None -> out "DB not initialized"
 74+;;
 75+
 76+(* Return information about an incorrect request. *)
 77+let generate_help_page (cgi : Netcgi.cgi_activation) =
 78+ let out = cgi # out_channel # output_string in
 79+ out not_found_text_token
 80+;;
 81+
 82+(* Record that a vote happened. *)
 83+let generate_vote_page (cgi : Netcgi.cgi_activation) (rev_id : int)
 84+ (page_id : int) (user_id : int) (v_time : string) (page_title : string) =
 85+ let out = cgi # out_channel # output_string in
 86+ let safe_page_title = Mysql.escape page_title in
 87+ match !dbh with
 88+ | Some db -> (
 89+ let vote = {
 90+ vote_time=(Mysql.escape v_time);
 91+ vote_page_id=page_id;
 92+ vote_revision_id=rev_id;
 93+ vote_voter_id=user_id;
 94+ } in
 95+ let res = try (db # vote vote;
 96+ db # mark_to_color rev_id page_id safe_page_title
 97+ (Mysql.escape v_time) user_id;
 98+ "good") with
 99+ Online_db.DB_TXN_Bad -> "bad" in
 100+ out res
 101+ )
 102+ | None -> out "DB not initialized"
 103+;;
 104+
 105+let generate_page (cgi : Netcgi.cgi_activation) =
 106+ (* Check which page is to be displayed. This is contained in the CGI
 107+ * argument "page".
 108+ *)
 109+
 110+ let page_id = try (int_of_string (cgi # argument_value "page"))
 111+ with int_of_string -> -1 in
 112+ let rev_id = try (int_of_string (cgi # argument_value "rev"))
 113+ with int_of_string -> -1 in
 114+ let page_title = (cgi # argument_value "page_title") in
 115+ let time_str = (cgi # argument_value "time") in
 116+ let user_id = try (int_of_string (cgi # argument_value "user"))
 117+ with int_of_string -> 0 in
 118+ match cgi # argument_value "vote" with
 119+ | "" -> (
 120+ if rev_id < 0 || page_id < 0 then generate_help_page cgi else
 121+ generate_text_page cgi rev_id page_id page_title time_str
 122+ user_id
 123+ )
 124+ | _ -> (
 125+ generate_vote_page cgi rev_id page_id user_id time_str page_title
 126+ )
 127+;;
 128+
 129+let process2 (cgi : Netcgi.cgi_activation) =
 130+ (* The [try] block catches errors during the page generation. *)
 131+ try
 132+ (* Set the header. The header specifies that the page must not be
 133+ * cached. This is important for dynamic pages called by the GET
 134+ * method, otherwise the browser might display an old version of
 135+ * the page.
 136+ * Furthermore, we set the content type and the character set.
 137+ * Note that the header is not sent immediately to the browser because
 138+ * we have enabled HTML buffering.
 139+ *)
 140+ cgi # set_header
 141+ ~cache:`No_cache
 142+ ~content_type:"text/plain; charset=\"iso-8859-1\""
 143+ ();
 144+
 145+ generate_page cgi;
 146+
 147+ (* After the page has been fully generated, we can send it to the
 148+ * browser.
 149+ *)
 150+ cgi # out_channel # commit_work();
 151+ with
 152+ error ->
 153+ (* An error has happened. Generate now an error page instead of
 154+ * the current page. By rolling back the output buffer, any
 155+ * uncomitted material is deleted.
 156+ *)
 157+ cgi # out_channel # rollback_work();
 158+
 159+ (* We change the header here only to demonstrate that this is
 160+ * possible.
 161+ *)
 162+ cgi # set_header
 163+ ~status:`Forbidden (* Indicate the error *)
 164+ ~cache:`No_cache
 165+ ~content_type:"text/plain; charset=\"iso-8859-1\""
 166+ ();
 167+
 168+ cgi # out_channel # output_string "While processing the request an O'Caml exception has been raised:\n";
 169+ cgi # out_channel # output_string ("" ^ text(Printexc.to_string error) ^ "\n");
 170+
 171+ (* Now commit the error page: *)
 172+ cgi # out_channel # commit_work()
 173+;;
 174+
 175+
 176+let process1 (cgi : Netcgi1_compat.Netcgi_types.cgi_activation) =
 177+ let cgi' = Netcgi1_compat.Netcgi_types.of_compat_activation cgi in
 178+ process2 cgi'
 179+
 180+
 181+(**********************************************************************)
 182+(* Create the webserver *)
 183+(**********************************************************************)
 184+
 185+
 186+let start() =
 187+ let (opt_list, cmdline_cfg) = Netplex_main.args() in
 188+
 189+ let use_mt = ref false in
 190+
 191+ let opt_list' =
 192+ [ ("-mt", Arg.Set use_mt,
 193+ " Use multi-threading instead of multi-processing");
 194+ ] @ (command_line_format @ opt_list) in
 195+
 196+ Arg.parse
 197+ opt_list'
 198+ (fun s -> raise (Arg.Bad ("Don't know what to do with: " ^ s)))
 199+ "usage: netplex [options]";
 200+
 201+ (* Prepares the database connection information *)
 202+ let mediawiki_db = {
 203+ dbhost = Some !mw_db_host;
 204+ dbname = Some !mw_db_name;
 205+ dbport = Some !mw_db_port;
 206+ dbpwd = Some !mw_db_pass;
 207+ dbuser = Some !mw_db_user;
 208+ } in
 209+ dbh := Some (new Online_db.db !db_prefix mediawiki_db None !dump_db_calls);
 210+
 211+ let parallelizer =
 212+ if !use_mt then
 213+ Netplex_mt.mt() (* multi-threading *)
 214+ else
 215+ Netplex_mp.mp() in (* multi-processing *)
 216+ let trust_store =
 217+ { Nethttpd_services.dyn_handler = (fun _ -> process1);
 218+ dyn_activation = Nethttpd_services.std_activation `Std_activation_buffered;
 219+ dyn_uri = None; (* not needed *)
 220+ dyn_translator = (fun _ -> ""); (* not needed *)
 221+ dyn_accept_all_conditionals = false;
 222+ } in
 223+ let nethttpd_factory =
 224+ Nethttpd_plex.nethttpd_factory
 225+ ~handlers:[ "trust", trust_store ]
 226+ () in
 227+ Netplex_main.startup
 228+ parallelizer
 229+ Netplex_log.logger_factories (* allow all built-in logging styles *)
 230+ Netplex_workload.workload_manager_factories (* ... all ways of workload management *)
 231+ [ nethttpd_factory ] (* make this nethttpd available *)
 232+ cmdline_cfg
 233+;;
 234+
 235+Sys.set_signal Sys.sigpipe Sys.Signal_ignore;
 236+start();;
Index: trunk/extensions/WikiTrust/mediawiki/extensions/Trust/TrustUpdateScripts.inc
@@ -86,6 +86,19 @@
8787 GRANT ALL ON ".$wgDBprefix."wikitrust_user TO $wgDBuser;
8888 ");
8989
 90+$create_scripts[$wgDBprefix.'wikitrust_missing_revs'] = array("
 91+CREATE TABLE ".$wgDBprefix."wikitrust_missing_revs (
 92+ revision_id int PRIMARY KEY,
 93+ page_id int NOT NULL,
 94+ page_title varchar(255) NOT NULL,
 95+ rev_time varchar(255) NOT NULL,
 96+ user_id int NOT NULL,
 97+ requested_on timestamp DEFAULT now(),
 98+ processed bool DEFAULT false
 99+) ENGINE=InnoDB","
 100+GRANT ALL ON ".$wgDBprefix."wikitrust_missing_revs TO $wgDBuser;
 101+");
 102+
90103 $create_index_scripts[$wgDBprefix . "revision"]['wikitrust_revision_id_timestamp_idx'] = "CREATE INDEX wikitrust_revision_id_timestamp_idx ON " . $wgDBprefix . "revision (rev_id, rev_timestamp)";
91104
92105 $remove_scripts[$wgDBprefix.'wikitrust_vote'] = array("DROP TABLE ".$wgDBprefix."wikitrust_vote");
@@ -96,6 +109,8 @@
97110 $remove_scripts[$wgDBprefix.'wikitrust_sigs'] = array("DROP TABLE ".$wgDBprefix."wikitrust_sigs");
98111 $remove_scripts[$wgDBprefix.'wikitrust_user'] = array("DROP TABLE
99112 ".$wgDBprefix."wikitrust_user");
 113+$remove_scripts[$wgDBprefix.'wikitrust_missing_revs'] = array("DROP TABLE
 114+".$wgDBprefix."wikitrust_missing_revs");
100115
101116 $remove_index_scripts[$wgDBprefix . "revision"]['wikitrust_revision_id_timestamp_idx'] = "DROP INDEX wikitrust_revision_id_timestamp_idx ON revision";
102117
Index: trunk/extensions/WikiTrust/mediawiki/extensions/Trust/Trust.php
@@ -25,8 +25,15 @@
2626 # Uses Tool Tip JS library under the LGPL.
2727 # http://www.walterzorn.com/tooltip/tooltip_e.htm
2828
29 -class TextTrust extends TrustBase
30 -{
 29+ // Turn old style errors into exceptions.
 30+function exception_error_handler($errno, $errstr, $errfile, $errline ) {
 31+ throw new ErrorException($errstr, 0, $errno, $errfile, $errline);
 32+}
 33+
 34+ // But only for warnings.
 35+set_error_handler("exception_error_handler", E_WARNING);
 36+
 37+class TextTrust extends TrustBase {
3138
3239 ## Types of analysis to perform.
3340 const TRUST_EVAL_VOTE = 0;
@@ -54,6 +61,14 @@
5562 ## Token to be replaed with >
5663 const TRUST_CLOSE_TOKEN = ":ampc:";
5764
 65+ ## Server forms
 66+ const NOT_FOUND_TEXT_TOKEN = "TEXT_NOT_FOUND";
 67+ const TRUST_COLOR_TOKEN = "<!--trust-->";
 68+ const CONTENT_URL = "http://localhost:4444/?";
 69+
 70+ ## Context for communicating with the trust server
 71+ const TRUST_TIMEOUT = 10;
 72+
5873 ## default values for variables found from LocalSettings.php
5974 var $DEFAULTS = array(
6075 'wgShowVoteButton' => false,
@@ -66,13 +81,14 @@
6782 'wgTrustLog' => "/dev/null",
6883 'wgTrustDebugLog' => "/dev/null",
6984 'wgRepSpeed' => 1.0,
 85+ 'wgNotPartExplanation' => "This page is not part of the trust coloring experement",
7086 'wgTrustTabText' => "Show Trust",
7187 'wgTrustExplanation' =>
7288 "<p><center><b>This is a product of the text trust algoruthm.</b></center></p>",
7389 );
7490
7591 ## Median Value of Trust
76 - var $median = 0.0;
 92+ var $median = 1.0;
7793
7894 ## Number of times a revision is looked at.
7995 var $times_rev_loaded = 0;
@@ -127,7 +143,7 @@
128144 if ((http_request.readyState == 4) && (http_request.status == 200)) {
129145 document.getElementById("vote-button-done").style.visibility = "visible";
130146 document.getElementById("vote-button").style.visibility = "hidden";
131 - //alert(http_request.responseText);
 147+ // alert(http_request.responseText);
132148 return true;
133149 } else {
134150 alert(http_request.responseText);
@@ -157,7 +173,7 @@
158174 }
159175 }
160176
161 - return sajax_do_call( "TextTrust::handleVote", [wgUserName, wgArticleId, revID] , voteCallback );
 177+ return sajax_do_call( "TextTrust::handleVote", [wgUserName, wgArticleId, revID, wgPageName] , voteCallback );
162178 }
163179
164180 /*]]>*/</script>';
@@ -226,97 +242,95 @@
227243 public static function &singleton( )
228244 { return parent::singleton( ); }
229245
230 - public function TextTrust()
231 - {
232 - parent::__construct( );
233 - global $wgExtensionCredits, $wgShowVoteButton, $wgVoteText, $wgThankYouForVoting;
234 - global $wgNoTrustExplanation, $wgTrustCmd, $wgVoteRev, $wgTrustLog, $wgTrustDebugLog, $wgRepSpeed;
235 - global $wgTrustTabText, $wgTrustExplanation;
236 -
237 - //Add default values if globals not set.
238 - if(!$wgShowVoteButton)
239 - $wgShowVoteButton = $this->DEFAULTS['wgShowVoteButton'];
240 - if(!$wgVoteText)
241 - $wgVoteText = $this->DEFAULTS['wgVoteText' ];
242 - if(!$wgThankYouForVoting)
243 - $wgThankYouForVoting = $this->DEFAULTS['wgThankYouForVoting'];
244 - if(!$wgNoTrustExplanation)
245 - $wgNoTrustExplanation = $this->DEFAULTS['wgNoTrustExplanation'];
246 - if(!$wgTrustCmd)
247 - $wgTrustCmd = $this->DEFAULTS['wgTrustCmd' ];
248 - if(!$wgVoteRev)
249 - $wgVoteRev = $this->DEFAULTS['wgVoteRev'];
250 - if(!$wgTrustLog)
251 - $wgTrustLog = $this->DEFAULTS['wgTrustLog'];
252 - if(!$wgTrustDebugLog)
253 - $wgTrustDebugLog = $this->DEFAULTS['wgTrustDebugLog'];
254 - if(!$wgRepSpeed)
255 - $wgRepSpeed = $this->DEFAULTS['wgRepSpeed'];
256 - if(!$wgTrustTabText)
257 - $wgTrustTabText = $this->DEFAULTS['wgTrustTabText'];
258 - if(!$wgTrustExplanation)
259 - $wgTrustExplanation = $this->DEFAULTS['wgTrustExplanation'];
260 -
 246+ public function TextTrust(){
 247+ parent::__construct( );
 248+ global $wgExtensionCredits, $wgShowVoteButton, $wgVoteText, $wgThankYouForVoting;
 249+ global $wgNoTrustExplanation, $wgTrustCmd, $wgVoteRev, $wgTrustLog, $wgTrustDebugLog, $wgRepSpeed;
 250+ global $wgTrustTabText, $wgTrustExplanation, $wgNotPartExplanation;
 251+
 252+ //Add default values if globals not set.
 253+ if(!$wgShowVoteButton)
 254+ $wgShowVoteButton = $this->DEFAULTS['wgShowVoteButton'];
 255+ if(!$wgVoteText)
 256+ $wgVoteText = $this->DEFAULTS['wgVoteText' ];
 257+ if(!$wgThankYouForVoting)
 258+ $wgThankYouForVoting = $this->DEFAULTS['wgThankYouForVoting'];
 259+ if(!$wgNoTrustExplanation)
 260+ $wgNoTrustExplanation = $this->DEFAULTS['wgNoTrustExplanation'];
 261+ if(!$wgNotPartExplanation)
 262+ $wgNotPartExplanation = $this->DEFAULTS['wgNotPartExplanation'];
 263+ if(!$wgTrustCmd)
 264+ $wgTrustCmd = $this->DEFAULTS['wgTrustCmd' ];
 265+ if(!$wgVoteRev)
 266+ $wgVoteRev = $this->DEFAULTS['wgVoteRev'];
 267+ if(!$wgTrustLog)
 268+ $wgTrustLog = $this->DEFAULTS['wgTrustLog'];
 269+ if(!$wgTrustDebugLog)
 270+ $wgTrustDebugLog = $this->DEFAULTS['wgTrustDebugLog'];
 271+ if(!$wgRepSpeed)
 272+ $wgRepSpeed = $this->DEFAULTS['wgRepSpeed'];
 273+ if(!$wgTrustTabText)
 274+ $wgTrustTabText = $this->DEFAULTS['wgTrustTabText'];
 275+ if(!$wgTrustExplanation)
 276+ $wgTrustExplanation = $this->DEFAULTS['wgTrustExplanation'];
 277+
261278 # Define a setup function
262 - $wgExtensionFunctions[] = 'ucscColorTrust_Setup';
263 -
 279+ $wgExtensionFunctions[] = 'ucscColorTrust_Setup';
 280+
264281 # Credits
265 - $wgExtensionCredits['parserhook'][] = array(
266 - 'name' => 'Trust Coloring',
267 - 'author' =>'Ian Pye',
268 - 'url' =>
269 - 'http://trust.cse.ucsc.edu',
270 - 'description' => 'This Extension
 282+ $wgExtensionCredits['parserhook'][] = array(
 283+ 'name' => 'Trust Coloring',
 284+ 'author' =>'Ian Pye',
 285+ 'url' =>
 286+ 'http://trust.cse.ucsc.edu',
 287+ 'description' => 'This Extension
271288 colors text according to trust.'
272 - );
 289+ );
273290 }
274291
275 - public function setup()
276 - {
 292+ // Sets the extension hooks.
 293+ public function setup() {
277294 parent::setup();
278 - global $wgHooks, $wgParser, $wgRequest, $wgUseAjax, $wgShowVoteButton, $wgAjaxExportList;
279 -
 295+ global $wgHooks, $wgParser, $wgRequest, $wgUseAjax, $wgShowVoteButton, $wgAjaxExportList, $wgUser;
 296+
280297 # Code which takes the "I vote" action.
281298 # This has to be statically called.
282299 if($wgUseAjax && $wgShowVoteButton){
283300 $wgAjaxExportList[] = "TextTrust::handleVote";
284301 }
285302
 303+ // Is the user opting to use wikitrust?
 304+ $tname = "gadget-WikiTrust";
 305+ if (!$wgUser->getOption( $tname ) ) {
 306+ return;
 307+ }
 308+
286309 # Updater fiered when updating to a new version of MW.
287 - $wgHooks['LoadExtensionSchemaUpdates'][] = array( &$this, 'updateDB');
288 -
 310+ $wgHooks['LoadExtensionSchemaUpdates'][] = array(&$this, 'updateDB');
 311+
289312 # And add and extra tab.
290 - $wgHooks['SkinTemplateTabs'][] = array( &$this, 'ucscTrustTemplate');
291 -
292 -# And add a hook so the colored text is found.
293 - $wgHooks['ParserBeforeStrip'][] = array( &$this, 'ucscSeeIfColored');
294 -
295 -# Color saved text
296 - $wgHooks['ArticleSaveComplete'][] = array( &$this, 'ucscRunColoring');
297 -
 313+ $wgHooks['SkinTemplateTabs'][] = array(&$this, 'ucscTrustTemplate');
 314+
298315 # If the trust tab is not selected, or some other tabs are don't worry about things any more.
299316 if(!$wgRequest->getVal('trust') || $wgRequest->getVal('action')){
300317 $this->trust_engaged = false;
301318 return;
302319 }
303320 $this->trust_engaged = true;
304 -
 321+
305322 # Add trust CSS and JS
306323 $wgHooks['OutputPageBeforeHTML'][] = array( &$this, 'ucscColorTrust_OP');
307 -
 324+
308325 # Add a hook to initialise the magic words
309326 $wgHooks['LanguageGetMagic'][] = array( &$this, 'ucscColorTrust_Magic');
310 -
 327+
311328 # Set a function hook associating the blame and trust words with a callback function
312329 $wgParser->setFunctionHook( 't', array( &$this, 'ucscColorTrust_Render'));
313 -
 330+
314331 # After everything, make the blame info work
315332 $wgHooks['ParserAfterTidy'][] = array( &$this, 'ucscOrigin_Finalize');
316 -
317 -# Pull the median value
318 - $this->update_median();
319333 }
320 -
 334+
321335 /**
322336 * Update the DB when MW is updated.
323337 * This assums that the db has permissions to create tables.
@@ -326,7 +340,7 @@
327341 // Create the needed tables, if neccesary.
328342 // Pull in the create scripts.
329343 require_once("TrustUpdateScripts.inc");
330 -
 344+
331345 $db =& wfGetDB( DB_MASTER );
332346
333347 // First check to see what tables have already been created.
@@ -334,7 +348,7 @@
335349 while ($row = $db->fetchRow($res)){
336350 $db_tables[$row[0]] = True;
337351 }
338 -
 352+
339353 foreach ($create_scripts as $table => $scripts) {
340354 if (!$db_tables[$table]){
341355 foreach ($scripts as $script){
@@ -344,29 +358,16 @@
345359 }
346360 }
347361
348 - /**
349 - * Turns an ASCII string into an octal encoded one.
350 - * Call like this: TextTrust::prepareOutput("This is a test");
351 - */
352 - static function prepareOutput($command){
353 - $escaped = "";
354 - foreach (str_split($command) as $c ){
355 - $escaped .= sprintf("\\0o%03o", ord($c));
356 - }
357 - return $escaped;
358 - }
359 -
360362 /**
361 - Run the vote executable.
362 -
 363+ Records the vote.
363364 Called via ajax, so this must be static.
364365 */
365 - static function handleVote($user_name_raw, $page_id_raw = 0, $rev_id_raw = 0){
 366+ static function handleVote($user_name_raw, $page_id_raw = 0, $rev_id_raw = 0, $page_title = ""){
366367
367368 $response = new AjaxResponse("0");
368 -
 369+
369370 $dbr =& wfGetDB( DB_SLAVE );
370 -
 371+
371372 $userName = $dbr->strencode($user_name_raw, $dbr);
372373 $page_id = $dbr->strencode($page_id_raw, $dbr);
373374 $rev_id = $dbr->strencode($rev_id_raw, $dbr);
@@ -381,35 +382,24 @@
382383 $user_id = 0;
383384 }
384385 }
385 - $dbr->freeResult( $res );
386 -
387 - // Now see if this user has not already voted, and count the vote if its the first time though.
388 - $res = $dbr->select('wikitrust_vote', array('revision_id'), array('revision_id' => $rev_id, 'voter_id' => $user_id), array());
389 - if ($res){
390 - $row = $dbr->fetchRow($res);
391 - if(!$row['revision_id']){
392 -
393 - $insert_vals = array("revision_id" => $rev_id,
394 - "page_id" => $page_id ,
395 - "voter_id" => $user_id,
396 - "voted_on" => wfTimestampNow()
397 - );
398 - $dbw =& wfGetDB( DB_MASTER );
399 - if ($dbw->insert( 'wikitrust_vote', $insert_vals)){
400 - $dbw->commit();
401 - $response = new AjaxResponse(implode ( ",", $insert_vals));
402 - self::runEvalEdit(self::TRUST_EVAL_VOTE, $rev_id, $page_id, $user_id); // Launch the evaluation of the vote.
403 - }
404 - } else {
405 - $response = new AjaxResponse("Already Voted");
406 - }
407 - $dbr->freeResult( $res );
408 - }
 386+ $dbr->freeResult( $res );
 387+
 388+ $ctx = stream_context_create(
 389+ array('http' => array(
 390+ 'timeout' =>
 391+ self::TRUST_TIMEOUT
 392+ )
 393+ )
 394+ );
 395+
 396+ $vote_str = ("Voting at " . self::CONTENT_URL . "vote=1&rev=$rev_id&page=$page_id&user=$user_id&page_title=$page_title&time=" . wfTimestampNow());
 397+ $colored_text = file_get_contents(self::CONTENT_URL . "vote=1&rev=$rev_id&page=$page_id&user=$user_id&page_title=$page_title&time=" .
 398+ wfTimestampNow(), 0, $ctx);
 399+ $response = new AjaxResponse($vote_str);
409400 }
410 -
411401 return $response;
412402 }
413 -
 403+
414404 /**
415405 Called just before rendering HTML.
416406 We add the coloring scripts here.
@@ -422,269 +412,259 @@
423413 }
424414 return true;
425415 }
426 -
427 - /**
428 - Updated the cached median reputation value.
429 - */
430 - function update_median(){
431 - $dbr =& wfGetDB( DB_SLAVE );
432 - $res = $dbr->select('wikitrust_global', 'median', array(), array());
433 - if ($res){
434 - $row = $dbr->fetchRow($res);
435 - $this->median = $row['median'];
436 - }
437 - $dbr->freeResult( $res );
438 -
439 - // check for divide by 0 errors.
440 - if ($this->median == 0)
441 - $this->median = 1;
442 -
443 - return $this->median;
444 - }
445 -
446 - /**
447 - * Actually run the eval edit program.
448 - * Returns -1 on error, the process id of the launched eval process otherwise.
449 - */
450 - private static function runEvalEdit($eval_type = self::TRUST_EVAL_EDIT, $rev_id = -1, $page_id = -1, $voter_id = -1){
451 -
452 - global $wgDBname, $wgDBuser, $wgDBpassword, $wgDBserver, $wgDBtype, $wgTrustCmd, $wgTrustLog, $wgTrustDebugLog, $wgRepSpeed, $wgDBprefix;
453 -
454 - $process = -1;
455 - $command = "";
456 - // Get the db.
457 - $dbr =& wfGetDB( DB_SLAVE );
458 -
459 - // Do we use a DB prefix?
460 - $prefix = ($wgDBprefix)? "-db_prefix " . $dbr->strencode($wgDBprefix): "";
461 -
462 - switch ($eval_type) {
463 - case self::TRUST_EVAL_EDIT:
464 - $command = escapeshellcmd("$wgTrustCmd -rep_speed $wgRepSpeed -log_file $wgTrustLog -db_host $wgDBserver -db_user $wgDBuser -db_pass $wgDBpassword -db_name $wgDBname $prefix") . " &";
465 - break;
466 - case self::TRUST_EVAL_VOTE:
467 - if ($rev_id == -1 || $page_id == -1 || $voter_id == -1)
468 - return -1;
469 - $command = escapeshellcmd("$wgTrustCmd -eval_vote -rev_id " . $dbr->strencode($rev_id) . " -voter_id " . $dbr->strencode($voter_id) . " -page_id " . $dbr->strencode($page_id) . " -rep_speed $wgRepSpeed -log_file $wgTrustLog -db_host $wgDBserver -db_user $wgDBuser -db_pass $wgDBpassword -db_name $wgDBname $prefix") . " &";
470 - break;
471 - case self::TRUST_EVAL_MISSING:
472 - $command = escapeshellcmd("$wgTrustCmd -rev_id " . $dbr->strencode($rev_id) . " -rep_speed $wgRepSpeed -log_file $wgTrustLog -db_host $wgDBserver -db_user $wgDBuser -db_pass $wgDBpassword -db_name $wgDBname $prefix") . " &";
473 - break;
474 - }
475 -
476 - $descriptorspec = array(
477 - 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
478 - 1 => array("file", escapeshellcmd($wgTrustDebugLog), "a"), // stdout is a pipe that the child will write to
479 - 2 => array("file", escapeshellcmd($wgTrustDebugLog), "a") // stderr is a file to write to
480 - );
481 - $cwd = '/tmp';
482 - $env = array();
483 - $process = proc_open($command, $descriptorspec, $pipes, $cwd, $env);
484 -
485 - return $process;
486 - }
487 -
488 -/*
489 - Code to fork and exec a new process to color any new revisions.
490 - Called after any edits are made.
491 -*/
492 - function ucscRunColoring(&$article, &$user, &$text, &$summary, $minor, $watch, $sectionanchor, &$flags, $revision) {
493 - if (self::runEvalEdit(self::TRUST_EVAL_EDIT) >= 0)
494 - return true;
495 - return false;
496 - }
497 -
 416+
498417 # Actually add the tab.
499 - function ucscTrustTemplate($skin, &$content_actions) {
 418+ function ucscTrustTemplate($skin, &$content_actions) {
 419+
 420+ global $wgTrustTabText, $wgRequest;
 421+ if (!isset($wgTrustTabText)){
 422+ $wgTrustTabText = "trust";
 423+ }
 424+
 425+ if ($wgRequest->getVal('action')){
 426+ // we don't want trust for actions.
 427+ return true;
 428+ }
 429+
 430+ if ($wgRequest->getVal('diff')){
 431+ // or for diffs
 432+ return true;
 433+ }
 434+
 435+ $trust_qs = $_SERVER['QUERY_STRING'];
 436+ if($trust_qs){
 437+ $trust_qs = "?" . $trust_qs . "&trust=t";
 438+ } else {
 439+ $trust_qs .= "?trust=t";
 440+ }
 441+
 442+ $content_actions['trust'] = array ( 'class' => '',
 443+ 'text' => $wgTrustTabText,
 444+ 'href' =>
 445+ $_SERVER['PHP_SELF'] . $trust_qs );
 446+
 447+ if($wgRequest->getVal('trust')){
 448+ $content_actions['trust']['class'] = 'selected';
 449+ $content_actions['nstab-main']['class'] = '';
 450+ $content_actions['nstab-main']['href'] .= '';
 451+ } else {
 452+ $content_actions['trust']['href'] .= '';
 453+ }
 454+ return true;
 455+ }
500456
501 - global $wgTrustTabText, $wgRequest;
502 - if (!isset($wgTrustTabText)){
503 - $wgTrustTabText = "trust";
504 - }
505 -
506 - if ($wgRequest->getVal('action')){
507 - // we don't want trust for actions.
508 - return true;
509 - }
510 -
511 - $trust_qs = $_SERVER['QUERY_STRING'];
512 - if($trust_qs){
513 - $trust_qs = "?" . $trust_qs . "&trust=t";
514 - } else {
515 - $trust_qs .= "?trust=t";
516 - }
517 -
518 - $content_actions['trust'] = array ( 'class' => '',
519 - 'text' => $wgTrustTabText,
520 - 'href' =>
521 - $_SERVER['PHP_SELF'] . $trust_qs );
522 -
523 - if($wgRequest->getVal('trust')){
524 - $content_actions['trust']['class'] = 'selected';
525 - $content_actions['nstab-main']['class'] = '';
526 - $content_actions['nstab-main']['href'] .= '';
527 - } else {
528 - $content_actions['trust']['href'] .= '';
529 - }
530 - return true;
531 - }
532 -
533 - /**
534 - If colored text exists, use it instead of the normal text,
535 - but only if the trust tab is selected.
536 -
537 - TODO: Make this function work with caching turned on.
538 - */
539 - function ucscSeeIfColored(&$parser, &$text, &$strip_state) {
540 - global $wgRequest, $wgTrustExplanation, $wgUseAjax, $wgShowVoteButton, $wgDBprefix, $wgNoTrustExplanation, $wgVoteText, $wgThankYouForVoting;
541 -
542 - // Turn off caching for this instanching for this instance.
543 - $parser->disableCache();
544 -
545 - // Get the db.
546 - $dbr =& wfGetDB( DB_SLAVE );
547 -
548 - // Do we use a DB prefix?
549 - $prefix = ($wgDBprefix)? "-db_prefix " . $dbr->strencode($wgDBprefix): "";
550 -
551 - // Text for showing the "I like it" button
552 - $voteitText = "";
553 - if ($wgUseAjax && $wgShowVoteButton){
554 - $voteitText = "
 457+ /**
 458+ If colored text exists, use it instead of the normal text,
 459+ but only if the trust tab is selected.
 460+ */
 461+ function ucscSeeIfColored(&$parser, &$text, &$strip_state = Null) {
 462+ global $wgRequest, $wgTrustExplanation, $wgUseAjax, $wgShowVoteButton, $wgDBprefix, $wgNoTrustExplanation, $wgVoteText, $wgThankYouForVoting, $wgNotPartExplanation;
 463+
 464+ // Get the db.
 465+ $dbr =& wfGetDB( DB_SLAVE );
 466+
 467+ // Do we use a DB prefix?
 468+ $prefix = ($wgDBprefix)? "-db_prefix " . $dbr->strencode($wgDBprefix): "";
 469+
 470+ // Text for showing the "I like it" button
 471+ $voteitText = "";
 472+ if ($wgUseAjax && $wgShowVoteButton){
 473+ $voteitText = "
555474 ".self::TRUST_OPEN_TOKEN."div id='vote-button'".self::TRUST_CLOSE_TOKEN."".self::TRUST_OPEN_TOKEN."input type='button' name='vote' value='" . $wgVoteText . "' onclick='startVote()' /".self::TRUST_CLOSE_TOKEN."".self::TRUST_OPEN_TOKEN."/div".self::TRUST_CLOSE_TOKEN."
556475 ".self::TRUST_OPEN_TOKEN."div id='vote-button-done'".self::TRUST_CLOSE_TOKEN.$wgThankYouForVoting.self::TRUST_OPEN_TOKEN."/div".self::TRUST_CLOSE_TOKEN."
557476 ";
558 - }
 477+ }
559478
560 - // Return if trust is not selected.
561 - if (!$this->trust_engaged)
562 - return true;
 479+ // Return if trust is not selected.
 480+ if (!$this->trust_engaged)
 481+ return true;
563482
564 - // Save the title object, if it is not already present
565 - if (!$this->title){
566 - $this->title = $parser->getTitle();
567 - }
 483+ // Save the title object, if it is not already present
 484+ if (!$this->title){
 485+ $this->title = $parser->getTitle();
 486+ }
568487
569 - // count the number of times we load this text
570 - $this->times_rev_loaded++;
 488+ // count the number of times we load this text
 489+ $this->times_rev_loaded++;
571490
572 - // Load the current revision id.
573 - if (!$this->current_rev){
574 - if ($parser->mRevisionId){
575 - $this->current_rev = $parser->mRevisionId;
576 - } else {
577 - // Sometimes the revisionId field is not filled in.
578 - $this->current_rev = $this->title->getPreviousRevisionID( PHP_INT_MAX );
579 - }
580 - }
581 -
582 - /**
583 - This method is being called multiple times for each page.
584 - We only pull the colored text for the first time through.
585 - */
586 - if ($this->colored){
 491+ // Load the current revision id.
 492+ if (!$this->current_rev){
 493+ if ($parser->mRevisionId){
 494+ $this->current_rev = $parser->mRevisionId;
 495+ } else {
 496+ // Sometimes the revisionId field is not filled in.
 497+ $this->current_rev = $this->title->getPreviousRevisionID( PHP_INT_MAX );
 498+ }
 499+ }
 500+
 501+ /**
 502+ This method is being called multiple times for each page.
 503+ We only pull the colored text for the first time through.
 504+ */
 505+ if ($this->colored){
 506+ return true;
 507+ }
 508+
 509+ if ($wgRequest->getVal('diff')){
 510+ // For diffs, look for the absence of the diff token instead of counting
 511+ if(substr($text,0,3) == self::DIFF_TOKEN_TO_COLOR){
 512+ return true;
 513+ }
 514+ }
 515+
 516+ // if we made it here, we are going to color some text
 517+ $this->colored = true;
 518+
 519+ // Check to see if this page is part of the coloring project.
 520+ // Disabled for now.
 521+ //if (!strstr($text, self::TRUST_COLOR_TOKEN)){
 522+ // $text = $wgNotPartExplanation . "\n" . $text;
 523+ // return true;
 524+ //}
 525+
 526+ // Get the page id and other data
 527+ $colored_text="";
 528+ $page_id=0;
 529+ $rev_timestamp="";
 530+ $rev_user=0;
 531+ $res = $dbr->select('revision', array('rev_page', 'rev_timestamp', 'rev_user'), array('rev_id' => $this->current_rev), array());
 532+ if ($res){
 533+ $row = $dbr->fetchRow($res);
 534+ $page_id = $row['rev_page'];
 535+ $rev_user = $row['rev_user'];
 536+ $rev_timestamp = $row['rev_timestamp'];
 537+ if (!$page_id) {
 538+ $page_id = 0;
 539+ }
 540+ }
 541+ $dbr->freeResult( $res );
 542+
 543+ $page_title = $_GET['title'];
 544+ $ctx = stream_context_create(
 545+ array('http' => array(
 546+ 'timeout' =>
 547+ self::TRUST_TIMEOUT
 548+ )
 549+ )
 550+ );
 551+ try {
 552+ // Should we do doing this via HTTPS?
 553+ $colored_raw = (file_get_contents(self::CONTENT_URL . "rev=" . $this->current_rev . "&page=$page_id&page_title=$page_title&time=$rev_timestamp&user=$rev_user", 0, $ctx));
 554+ } catch (Exception $e) {
 555+ $colored_raw = "";
 556+ }
 557+
 558+ if ($colored_raw && $colored_raw != self::NOT_FOUND_TEXT_TOKEN){
 559+ // Work around because of issues with php's built in
 560+ // gzip function.
 561+ $f = tempnam('/tmp', 'gz_fix');
 562+ file_put_contents($f, $colored_raw);
 563+ $colored_raw = file_get_contents('compress.zlib://' . $f);
 564+ unlink($f);
 565+
 566+ // Pick off the median value first.
 567+ $colored_data = explode(",", $colored_raw, 2);
 568+ $colored_text = $colored_data[1];
 569+ if (preg_match("/^[+-]?(([0-9]+)|([0-9]*\.[0-9]+|[0-9]+\.[0-9]*)|
 570+ (([0-9]+|([0-9]*\.[0-9]+|[0-9]+\.[0-9]*))[eE][+-]?[0-9]+))$/", $colored_data[0])){
 571+ $this->median = $colored_data[0];
 572+ }
 573+
 574+ // First, make sure that there are not any instances of our tokens in the colored_text
 575+ $colored_text = str_replace(self::TRUST_OPEN_TOKEN, "", $colored_text);
 576+ $colored_text = str_replace(self::TRUST_CLOSE_TOKEN, "", $colored_text);
 577+
 578+ $colored_text = preg_replace("/&apos;/", "'", $colored_text, -1);
 579+
 580+ $colored_text = preg_replace("/&amp;/", "&", $colored_text, -1);
 581+
 582+ $colored_text = preg_replace("/&lt;/", self::TRUST_OPEN_TOKEN, $colored_text, -1);
 583+ $colored_text = preg_replace("/&gt;/", self::TRUST_CLOSE_TOKEN, $colored_text, -1);
 584+
 585+ // Now update the text.
 586+ $text = $voteitText . $colored_text . "\n" . $wgTrustExplanation;
 587+ } else {
 588+ // Return a message about the missing text.
 589+ $text = $wgNoTrustExplanation . "\n" . $text;
 590+ }
 591+
587592 return true;
588593 }
589 -
590 - if (strstr($text, "{{ns:project}}")) {
 594+
 595+ /* Register the tags we are intersted in expanding. */
 596+ function ucscColorTrust_Magic( &$magicWords, $langCode ) {
 597+ $magicWords[ 't' ] = array( 0, 't' );
591598 return true;
592599 }
593 -
594 - if ($wgRequest->getVal('diff')){
595 - // For diffs, look for the absence of the diff token instead of counting
596 - if(substr($text,0,3) == self::DIFF_TOKEN_TO_COLOR){
597 - return true;
 600+
 601+ /* Pull in any colored text. Also handle closing tags. */
 602+ function ucscOrigin_Finalize(&$parser, &$text) {
 603+ global $wgScriptPath, $IP, $wgOut;
 604+
 605+ if(!$this->colored){
 606+ // This is to handle caching problems.
 607+ if (!strstr($text, "This page has been accessed")){
 608+ $colored_text = $text;
 609+ $this->ucscSeeIfColored($parser, $colored_text);
 610+ $text = $wgOut->parse( $colored_text );
 611+ } else {
 612+ $colored_text = $text;
 613+ $this->ucscSeeIfColored($parser, $colored_text);
 614+ $wgOut->mBodytext = $wgOut->parse( $colored_text );
 615+ }
 616+ }
 617+
 618+ $count = 0;
 619+ $text = '<script type="text/javascript" src="'.$wgScriptPath.'/extensions/Trust/js/wz_tooltip.js"></script>' . $text;
 620+ $text = preg_replace('/' . self::TRUST_OPEN_TOKEN . '/', "<", $text, -1, $count);
 621+ $text = preg_replace('/' . self::TRUST_CLOSE_TOKEN .'/', ">", $text, -1, $count);
 622+ $text = preg_replace('/<\/p>/', "</span></p>", $text, -1, $count);
 623+ $text = preg_replace('/<p><\/span>/', "<p>", $text, -1, $count);
 624+ $text = preg_replace('/<li><\/span>/', "<li>", $text, -1, $count);
 625+
 626+ return true;
 627+ }
 628+
 629+ /* Text Trust */
 630+ function ucscColorTrust_Render( &$parser, $combinedValue = "0,0,0" ) {
 631+
 632+ // Split the value into trust and origin information.
 633+ // 0 = trust
 634+ // 1 = origin
 635+ // 2 = contributing author
 636+ $splitVals = explode(self::TRUST_SPLIT_TOKEN, $combinedValue);
 637+
 638+ $class = $this->computeColorFromFloat($splitVals[0]);
 639+ $output = self::TRUST_OPEN_TOKEN . "span class=\"$class\""
 640+ . "onmouseover=\"Tip('".$splitVals[2]."')\" onmouseout=\"UnTip()\""
 641+ . "onclick=\"showOrigin("
 642+ . $splitVals[1] . ")\"" . self::TRUST_CLOSE_TOKEN;
 643+
 644+ $this->current_trust = $class;
 645+ if ($this->first_span){
 646+ $this->first_span = false;
 647+ } else {
 648+ $output = self::TRUST_OPEN_TOKEN . "/span" . self::TRUST_CLOSE_TOKEN . $output;
598649 }
 650+
 651+ return array ( $output, "noparse" => false, "isHTML" => false );
599652 }
600 -
601 - // if we made it here, we are going to color some text
602 - $this->colored = true;
603 -
604 - $res = $dbr->select('wikitrust_colored_markup', 'revision_text',
605 - array( 'revision_id' => $this->current_rev ), array());
606 - if ($res){
607 - $row = $dbr->fetchRow($res);
608 - $colored_text = $row[0];
609 - if ($colored_text){
610 - // First, make sure that there are not any instances of our tokens in the colored_text
611 - $colored_text = str_replace(self::TRUST_OPEN_TOKEN, "", $colored_text);
612 - $colored_text = str_replace(self::TRUST_CLOSE_TOKEN, "", $colored_text);
613 -
614 - // Now update the text.
615 - $text = $voteitText . $colored_text . "\n" . $wgTrustExplanation;
616 - } else {
617 - // If the colored text is missing, generate it in the background.
618 - // For now, return a message about the missing text.
619 - self::runEvalEdit(self::TRUST_EVAL_MISSING);
620 - $text = $wgNoTrustExplanation . "\n" . $text;
621 - }
622 - } else {
623 - return false;
624 - }
625 - $dbr->freeResult( $res );
626 - return true;
627 - }
628653
629 - /* Register the tags we are intersted in expanding. */
630 - function ucscColorTrust_Magic( &$magicWords, $langCode ) {
631 - $magicWords[ 't' ] = array( 0, 't' );
632 - return true;
633 - }
634 -
635 - /* Turn the finished trust info into a span tag. Also handle closing tags. */
636 - function ucscOrigin_Finalize(&$parser, &$text) {
637 - global $wgScriptPath;
638 - $count = 0;
639 - $text = '<script type="text/javascript" src="'.$wgScriptPath.'/extensions/Trust/js/wz_tooltip.js"></script>' . $text;
640 - $text = preg_replace('/' . self::TRUST_OPEN_TOKEN . '/', "<", $text, -1, $count);
641 - $text = preg_replace('/' . self::TRUST_CLOSE_TOKEN .'/', ">", $text, -1, $count);
642 - $text = preg_replace('/<\/p>/', "</span></p>", $text, -1, $count);
643 - $text = preg_replace('/<p><\/span>/', "<p>", $text, -1, $count);
644 - $text = preg_replace('/<li><\/span>/', "<li>", $text, -1, $count);
 654+ /**
 655+ Maps from the online trust values to the css trust values.
 656+ Normalize the value for growing wikis.
 657+ */
 658+ function computeColorFromFloat($trust){
 659+ $normalized_value = min(self::MAX_TRUST_VALUE, max(self::MIN_TRUST_VALUE,
 660+ (($trust + .5) * self::TRUST_MULTIPLIER)
 661+ / $this->median));
 662+ return $this->computeColor3($normalized_value);
 663+ }
645664
646 - return true;
647 - }
648 -
649 - /* Text Trust */
650 - function ucscColorTrust_Render( &$parser, $combinedValue = "0,0,0" ) {
651 -
652 - // Split the value into trust and origin information.
653 - // 0 = trust
654 - // 1 = origin
655 - // 2 = contributing author
656 - $splitVals = explode(self::TRUST_SPLIT_TOKEN, $combinedValue);
657 -
658 - $class = $this->computeColorFromFloat($splitVals[0]);
659 - $output = self::TRUST_OPEN_TOKEN . "span class=\"$class\""
660 - . "onmouseover=\"Tip('".$splitVals[2]."')\" onmouseout=\"UnTip()\""
661 - . "onclick=\"showOrigin("
662 - . $splitVals[1] . ")\"" . self::TRUST_CLOSE_TOKEN;
663 -
664 - $this->current_trust = $class;
665 - if ($this->first_span){
666 - $this->first_span = false;
667 - } else {
668 - $output = self::TRUST_OPEN_TOKEN . "/span" . self::TRUST_CLOSE_TOKEN . $output;
669 - }
670 -
671 - return array ( $output, "noparse" => false, "isHTML" => false );
672 - }
673 -
674 - /**
675 - Maps from the online trust values to the css trust values.
676 - Normalize the value for growing wikis.
677 - */
678 - function computeColorFromFloat($trust){
679 - $normalized_value = min(self::MAX_TRUST_VALUE, max(self::MIN_TRUST_VALUE,
680 - (($trust + .5) * self::TRUST_MULTIPLIER)
681 - / $this->median));
682 - return $this->computeColor3($normalized_value);
683 - }
684 -
685 - /* Maps a trust value to a HTML color representing the trust value. */
686 - function computeColor3($fTrustValue){
687 - return $this->COLORS[$fTrustValue];
688 - }
 665+ /* Maps a trust value to a HTML color representing the trust value. */
 666+ function computeColor3($fTrustValue){
 667+ return $this->COLORS[$fTrustValue];
 668+ }
689669 }
690670
691671 TextTrust::singleton();
Index: trunk/extensions/WikiTrust/README
@@ -220,6 +220,8 @@
221221 libmysql-ocaml-dev
222222 libextlib-ocaml-dev
223223 python-mysqldb
 224+ libxml-light-ocaml-dev
 225+ libzip-ocaml-dev
224226
225227 You also need:
226228

Status & tagging log