Index: trunk/tools/daily-image-l/dailyimagel.py |
— | — | @@ -0,0 +1,180 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +# -*- coding: utf-8 -*- |
| 4 | + |
| 5 | + |
| 6 | +wget = '''/usr/sfw/bin/wget -S -erobots=off -q -O - ''' |
| 7 | + |
| 8 | +todaypotd = r'http://commons.wikimedia.org/w/index.php?title=Commons:Picture_of_the_day/Today&action=purge' |
| 9 | +urlbase = r'http://commons.wikimedia.org/wiki/' |
| 10 | +#querycat = 'http://commons.wikimedia.org/w/query.php?what=categories&format=txt&titles=' |
| 11 | +querycat = 'http://commons.wikimedia.org/w/api.php?action=query&format=txt&prop=categories&titles=' |
| 12 | +#querylinks = r'http://commons.wikimedia.org/w/query.php?what=imagelinks&ilnamespace=4&format=txt&illimit=300&titles=' |
| 13 | +querylinks = r'http://commons.wikimedia.org/w/api.php?action=query&format=txt&iunamespace=4&iulimit=500&list=imageusage&iutitle=' |
| 14 | + |
| 15 | + |
| 16 | +import os,sys,re |
| 17 | +from commands import getoutput |
| 18 | +from datetime import date |
| 19 | + |
| 20 | +repotdcontent = re.compile('<!-- start content -->(.*?)<!-- end content -->', re.DOTALL) |
| 21 | +reimagename = re.compile('<div class="magnify"><a href="https://www.mediawiki.org/wiki/([^"]*)" class="internal"') |
| 22 | +recats = re.compile('Category:(.*)') |
| 23 | +refplinks = re.compile('Commons:Featured pictures/([^c].*)') |
| 24 | +reqilinks = re.compile('Commons:Quality [Ii]mages/([^c].*)') |
| 25 | +recaptions = re.compile('<ul>(.*?)</ul>', re.DOTALL) |
| 26 | +reli = re.compile('</?li[^>]*>') |
| 27 | +rea = re.compile('</?a[^>]*>') |
| 28 | +rei = re.compile('</?i>') |
| 29 | +renocaption = re.compile('\n[^:]*: Template:Potd[^)]*\)') |
| 30 | + |
| 31 | +SENDMAIL = "/usr/sbin/sendmail" |
| 32 | + |
| 33 | +mailfilename = "/projects/potd/dailyimagel.txt" |
| 34 | +mailerror = "/projects/potd/mailerror.txt" |
| 35 | + |
| 36 | +#mailto = "brianna.laugher@gmail.com" |
| 37 | +mailto = "daily-image-l@lists.wikimedia.org" |
| 38 | +#mailto = 'bryan.tongminh@gmail.com' |
| 39 | + |
| 40 | +def createmail(): |
| 41 | + ''' |
| 42 | + Attempts to create an email at mailfilename. |
| 43 | + ''' |
| 44 | + #print "starting" |
| 45 | + f = getoutput(wget + '--post-data submit "' + todaypotd + '"') |
| 46 | + #print "got wget output ok" |
| 47 | + |
| 48 | + wgetfile = open('/projects/potd/wgetoutput.txt','w') |
| 49 | + wgetfile.write(f) |
| 50 | + wgetfile.close() |
| 51 | + |
| 52 | + #print "f:",f |
| 53 | + |
| 54 | + content = repotdcontent.findall(f) |
| 55 | + |
| 56 | + #print "got content ok" |
| 57 | + |
| 58 | + # extract image name/url |
| 59 | + #print len(content) |
| 60 | + #print "content[0]:",content[0] |
| 61 | + |
| 62 | + imagename = reimagename.findall(content[0])[0] |
| 63 | + imageurl = urlbase + imagename |
| 64 | + |
| 65 | + #print "got image name ok" |
| 66 | + |
| 67 | + # attempt to determine license status from categories |
| 68 | + catstext = getoutput(wget + '"' + querycat + imagename + '"') |
| 69 | + categories = recats.findall(catstext) |
| 70 | + |
| 71 | + #print "categories:", categories |
| 72 | + |
| 73 | + licenses = {"GFDL":"GNU Free Documentation License", |
| 74 | + "CC-BY-SA-2.5,2.0,1.0":"Creative Commons Attribution ShareAlike license, all versions", |
| 75 | + "CC-BY-SA-1.0":"Creative Commons Attribution ShareAlike license, version 1.0", |
| 76 | + "CC-BY-SA-2.0":"Creative Commons Attribution ShareAlike license, version 2.0", |
| 77 | + "CC-BY-SA-2.5":"Creative Commons Attribution ShareAlike license, version 2.5", |
| 78 | + "CC-BY-1.0":"Creative Commons Attribution license, version 1.0", |
| 79 | + "CC-BY-2.0":"Creative Commons Attribution license, version 2.0", |
| 80 | + "CC-BY-2.5":"Creative Commons Attribution license, version 2.5" |
| 81 | + } |
| 82 | + |
| 83 | + lic = "" |
| 84 | + if "Self-published work" in categories: |
| 85 | + lic = "Created by a Wikimedian (see image page for details); " |
| 86 | + for l in licenses.keys(): |
| 87 | + if l in categories: |
| 88 | + lic += "Licensed under the " + licenses[l] +'. ' |
| 89 | + |
| 90 | + if "Public domain" in categories: |
| 91 | + lic = "Public domain" |
| 92 | + |
| 93 | + for cat in categories: |
| 94 | + if cat.startswith("PD"): |
| 95 | + if cat=="PD-self": |
| 96 | + lic = "Created by a Wikimedian (see image page for details); released into the public domain." |
| 97 | + elif cat=="PD Art": |
| 98 | + lic = "Reproduction of a two-dimensional work of art whose copyright has expired (public domain)." |
| 99 | + elif cat=="PD Old": |
| 100 | + lic = "Public domain (copyright expired due to the age of the work)." |
| 101 | + else: |
| 102 | + lic = "Public domain as a work of the " + cat[3:] + " organisation." |
| 103 | + |
| 104 | + # determine FP category (or 'topic') |
| 105 | + linkstext = getoutput(wget + '"' + querylinks + imagename + '"') |
| 106 | + isFP = True |
| 107 | + try: |
| 108 | + topics = refplinks.findall(linkstext)[0] |
| 109 | + except IndexError: |
| 110 | + try: |
| 111 | + isFP = False |
| 112 | + topics = reqilinks.findall(linkstext)[0] |
| 113 | + except IndexError: |
| 114 | + print "Could not find FP or QI backlink, aborting" |
| 115 | + raise IndexError, 'Could not find FP or QI backlink' |
| 116 | + |
| 117 | + if '/' in topics: |
| 118 | + topic = topics.split('/')[0] + ' (' + topics.split('/')[1] + ')' |
| 119 | + else: |
| 120 | + topic = topics |
| 121 | + |
| 122 | + # extract multilingual captions |
| 123 | + try: |
| 124 | + captions = recaptions.findall(content[0])[0] |
| 125 | + except IndexError: |
| 126 | + raise IndexError, 'no captions??' |
| 127 | + |
| 128 | + #print captions |
| 129 | + captions = reli.sub('',captions) |
| 130 | + captions = rea.sub('',captions) |
| 131 | + captions = rei.sub('',captions) |
| 132 | + captions = renocaption.sub('',captions) |
| 133 | + |
| 134 | + |
| 135 | + # write info to file |
| 136 | + g= open(mailfilename,'w') |
| 137 | + g.write("To: " + mailto + '\n') |
| 138 | + g.write('Content-Type: text/plain; charset=utf-8\r\n') |
| 139 | + #don't need this? |
| 140 | + #g.write("From: brianna.laugher@gmail.com\n") |
| 141 | + g.write("Subject: " + str(date.today()) + '\r\n\r\n') |
| 142 | + g.write("Body of email:\r\n") |
| 143 | + |
| 144 | + g.write(imageurl + '\n') |
| 145 | + g.write('Copyright status: ' + lic + '\n') |
| 146 | + if isFP: |
| 147 | + g.write('Featured Picture category: ' + topic + '\n\n') |
| 148 | + else: |
| 149 | + if 'Subject' in topic: |
| 150 | + g.write('Recognised as a Quality Image due to subject matter\n\n') |
| 151 | + else: |
| 152 | + g.write('Recognised as a Quality Image due to technical merit\n\n') |
| 153 | + g.write('Descriptions:\n') |
| 154 | + g.write(captions) |
| 155 | + g.close() |
| 156 | + return |
| 157 | + |
| 158 | +############################### |
| 159 | +error = None |
| 160 | +try: |
| 161 | + createmail() |
| 162 | +except: |
| 163 | + # some Python error, catch its name and send error mail |
| 164 | + error = sys.exc_info()[0] |
| 165 | + mailfilename = mailerror |
| 166 | + |
| 167 | +# get the email message from a file |
| 168 | +f = open(mailfilename, 'r') |
| 169 | +mail = f.read() |
| 170 | +f.close() |
| 171 | + |
| 172 | +if error: |
| 173 | + mail += "Error information: " + str(error) |
| 174 | + |
| 175 | +# open a pipe to the mail program and |
| 176 | +# write the data to the pipe |
| 177 | +p = os.popen("%s -t" % SENDMAIL, 'w') |
| 178 | +p.write(mail) |
| 179 | +exitcode = p.close() |
| 180 | +if exitcode: |
| 181 | + print "sendmail error: Exit code: %s" % exitcode |
Property changes on: trunk/tools/daily-image-l/dailyimagel.py |
___________________________________________________________________ |
Name: svn:eol-style |
1 | 182 | + native |