User:BenSbot/Code3

From JoCopedia
< User:BenSbot
Revision as of 12:21, 21 July 2009 by BenS (talk | contribs) (updated code)
Jump to navigation Jump to search

Here is the code for the third program that I run. If you have any questions ask in the discussion page. Note this is a compilation of both Code1 and Code2.

import wikipedia
import catlib
import pagegenerators
import re
import datetime


site = wikipedia.getSite()
songlist = []
linkslist = {}
rea = re.compile('^:\\s*\\W{3,3}(First\\s*)?Encores?\\W{3,3}.*' , re.I | re.S | re.M)
reb = re.compile('^:\\s*\\W{3,3}Encores?\\W{3,3}.*' , re.I | re.S | re.M)
rec = re.compile('^:\\s*\\W{3,3}First\\s*Encores?\\W{3,3}.*' , re.I | re.S | re.M)
red = re.compile('^:\\s*\\W{3,3}Second\\s*Encores?\\W{3,3}.*' , re.I | re.S | re.M)
ree = re.compile('^[^#\\n].*$' , re.M)
ref = re.compile('\\[\\[[^\\]]*\\]\\]')
ref2 = re.compile('[ \t]{0,2}\\|.*')
reg = re.compile('^\\*\\s*Location:.*$' , re.M)
reg2 = re.compile('\\*\\s*Location:\\s*')
reh = re.compile('^\\*\\s*Venue:.*$' , re.M)
reh2 = re.compile('\\*\\s*Venue:\\s*')
rei = re.compile('(19|20)\\d\\d[-](0[1-9]|1[012])[-](0[1-9]|[12][0-9]|3[01])')

showscat = catlib.Category(site,'Category:Shows')
showslist = list(pagegenerators.CategorizedPageGenerator(showscat))
for show in showslist: 
    page = show.get()
    title = show.title()
    
    Setlist = ree.sub("",rea.sub("",page))
    
    Encore = ""
    a = reb.search(page)
    if a != None:
        Encore = ree.sub("",red.sub("",a.group()))
    
    FirstEncore = ""
    b =  rec.search(page)
    if b != None:
        FirstEncore = ree.sub("",red.sub("",b.group()))
    
    SecondEncore = ""
    c = red.search(page)
    if c != None:
        SecondEncore = ree.sub("",c.group())
    
    City = "Unknown"
    d = reg.search(page)
    if d != None:
        City = reg2.sub("",d.group())
    
    Venue = "Unknown"
    e = reh.search(page)
    if e != None:
        Venue = reh2.sub("",e.group())

    Date = ""
    g = rei.search(show.aslink())
    if g != None:
        Date = g.group()
        
    
    setlinks = ref.findall(Setlist)
    for x in setlinks:
        x = ref2.sub("]]",x)
        Entry = [x,Date,City,Venue," ",title]
        songlist.append(Entry)
    
    encorelinks = ref.findall(Encore)
    for x in encorelinks:
        x = ref2.sub("]]",x)
        Entry = [x,Date,City,Venue,"Yes",title]
        songlist.append(Entry)
    
    firstencorelinks = ref.findall(FirstEncore)
    for x in firstencorelinks:
        x = ref2.sub("]]",x)
        Entry = [x,Date,City,Venue,"First",title]
        songlist.append(Entry)
    
    secondencorelinks = ref.findall(SecondEncore)
    for x in secondencorelinks:
        x = ref2.sub("]]",x)
        Entry = [x,Date,City,Venue,"Second",title]
        songlist.append(Entry)
        
    
    entiresetlist = ree.sub("",page)
    links = ref.findall(entiresetlist)
    for x in links:
        x = ref2.sub("]]",x)
        lx = x.lower()
        if linkslist.has_key(lx):
            v = linkslist[lx][1] + 1
            linkslist[lx] = (linkslist[lx][0], v)
        else:
            linkslist[lx] = (x,1)

bls = wikipedia.Page(site, u"User:BenSbot/Code1/Blacklist").get()
blacklist = q.findall(bls)
for link in blacklist:
    if link.lower() in linkslist:
        del linkslist[link.lower()]

output1 = "The following is a list of the songs [[Jonathan Coulton]] has played\
 in concert.  This list has been compiled from the setlists currently available\
 here on JoCopedia in the [[:Category:Shows|Shows]] section, by an awesome bot \
designed by user [[User:BenS|BenS]].  Keep in mind that not all setlists are \
currently available to JoCopedia, and not all setlists are 100%.  But this is a\
pretty good indicator.  This list is current as of " + \
str(datetime.date.today()) + "\n\n"

items = linkslist.values()

items.sort(lambda x,y: cmp(y[1], x[1]) or cmp(x[0], y[0]))
for l, c in items:
    output1 = output1 + ("*" + l + ": " + repr(c) + "\n")

output1 = output1 + "\n" + "[[Category:Show Statistics]]"
SongStats = wikipedia.Page(site, u"SongStats")
SongStats.put(output1, u"Song statistics")



songscat = catlib.Category(site,'Category:Songs')
songslist = list(pagegenerators.CategorizedPageGenerator(songscat))
for a in songslist: 
    tablelist = []
    count = 0
    
    for b in songlist:
        if a.aslink().lower() == b[0].lower():
            tablelist.append(b)
            count = count + 1
    
    if count != 0:
        tablelist.sort(lambda x,y: cmp(x[1], y[1]))
        output2 = "{{SongNav\n|cat=no\n}}\n\n\"\'\'\'" + str(a.title()) + "\'\'\'\" was played at the following concerts: \n\n" + "{|class=\"wikitable sortable\" background = \"white\" border = \"1px solid rgb(153, 153, 153)\" cellpadding = \"2%\" rules = \"all\"\n!\'\'\'Date\'\'\'!!\'\'\'Location\'\'\'!!\'\'\'Venue\'\'\'!!\'\'\'Encore?\'\'\'\n"
        for b in tablelist:
            output2 = output2 + "|-\n"
            output2 = output2 + "| <span style=\"display:none\">&</span>[[" + str(b[5]) + "|" + str(b[1]) + "]]"
            output2 = output2 + "\n| " + str(b[2])
            output2 = output2 + "\n| " + str(b[3])
            output2 = output2 + "\n| " + str(b[4])
            output2 = output2 + "\n"
        output2 = output2 + "|}\n\'\'NB: This page was created by a bot and was last updated on: " + str(datetime.date.today()) + "\n\n[[Category:Songs by Concert]]"
        page = wikipedia.Page(site, (str(a.title()) + "/Concerts"))
        if page.exists() == False:
            page.put(output2, u"Songs by Concert")
        elif output2 != page.get():
            page.put(output2, u"Songs by Concert")

print "fin"

Explanation

For explanation see explanations of Code1 and Code2. Please note some variables have been renamed due to clashes in name between the codes.