#!/usr/bin/python
# coding: utf-8
"""
Run:
Syntax: python wikidata_lowercase.py pages [-option]
and option can be one of these:
* -cat : Work on all pages which are in a specific category.
* -ref : Work on all pages that link to a certain page.
* -link : Work on all pages that are linked from a certain page.
* -start : Work on all pages on the home wiki, starting at the named page.
* -page : Work on one page.
"""
# Distributed under the terms of the MIT license.
#
__version__='$Id: commons_link.py 9692 2011-10-30 15:03:29Z xqt $'
import re
import wikipedia as pywikibot
import pagegenerators, catlib
comment1 = {
'ar':u'..',
}
comment2 = {
'ar':u'...',
}
class WikiBot:
def __init__(self, generator, acceptall = False):
self.generator = generator
self.acceptall = acceptall
def pages(self):
# create a site object, here for nl-wiki
site = pywikibot.getSite('nl')
# get the data repository site for the given site
repo = site.data_repository()
for page in self.generator:
if page.namespace()==0:
try:
pywikibot.output(u'\n>>>> %s <<<<' % page.title())
# create a Page object for nl-wiki
page = pywikibot.Page(site, page.title())
# Now we create the corresponding DataPage:
datapage = pywikibot.DataPage(page)
# Get the data in dictionary format
dictionary = datapage.get()
#print dictionary
# Check if nl and en iw are available
alllinks = dictionary['links']
nlresult = alllinks.get('nlwiki', 'not found')
enresult = alllinks.get('enwiki', 'not found')
if nlresult in ['not found']:
print 'This wikidata page has no NL entry, skip'
else:
if enresult in ['not found']:
print 'This wikidata page has no EN entry'
else:
print 'This wikidata page has a EN entry'
alllabel = dictionary['label']
nl_label = alllabel.get('nl', 'not found')
en_label = alllabel.get('en', 'not found')
if nl_label in ['not found']:
print 'This wikidata page has no NL label, skip'
else:
if en_label in ['not found']:
print 'This wikidata page has no EN label'
else:
print 'This wikidata page has a EN label'
#print en_label[:1]
#print en_label.isupper()
#print nl_label[:1]
#print nl_label.islower()
if (en_label[:1].isupper()) or (nl_label[:1].islower()) or (en_label[:1] in ['\'','1','2','3','4','5','6','7','8','9']):
#en: Uppercase, title, do nothing or, nl: is lowercase or en: is digit
print ' --skip, no change needed'
else:
print ' --enwiki label is lowercase'
new_nl_label = nl_label[:1].lower() + nl_label[1:]
#print 'Nieuw label voorstel: ' + new_nl_label
pywikibot.showDiff(nl_label, new_nl_label)
if not self.acceptall:
choice = pywikibot.inputChoice(
u'Do you want to accept these changes?',
['Yes', 'No', 'All'], ['y', 'N', 'a'],
'N')
if choice == 'a':
self.acceptall = True
if self.acceptall or choice == 'y':
try:
print 'maak edit??'
qnummer = dictionary['entity'] # get ID
wikidata = pywikibot.DataPage(repo, qnummer)
dictionary = wikidata.get()
test = dictionary['entity']
wikidata.setitem(summary=u"lowercase label",
items={'type': u'item', 'label': 'nl', 'value': new_nl_label })
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.NoPage:
pywikibot.output(u'Page %s does not exist?!' % page.title())
except pywikibot.IsRedirectPage:
pywikibot.output(u'Page %s is a redirect; skipping.'
% page.title())
except pywikibot.LockedPage:
pywikibot.output(u'Page %s is locked?!' % page.title())
if __name__ == "__main__":
singlepage = []
gen = None
start = None
try:
action = None
for arg in pywikibot.handleArgs():
if arg == ('pages'):
action = 'pages'
elif arg.startswith('-start:'):
start = pywikibot.Page(pywikibot.getSite(), arg[7:])
gen = pagegenerators.AllpagesPageGenerator(
start.title(withNamespace=False),
namespace=start.namespace(),
includeredirects = False)
elif arg.startswith('-cat:'):
cat = catlib.Category(pywikibot.getSite(),
'Category:%s' % arg[5:])
gen = pagegenerators.CategorizedPageGenerator(cat)
elif arg.startswith('-ref:'):
ref = pywikibot.Page(pywikibot.getSite(), arg[5:])
gen = pagegenerators.ReferringPageGenerator(ref)
elif arg.startswith('-link:'):
link = pywikibot.Page(pywikibot.getSite(), arg[6:])
gen = pagegenerators.LinkedPageGenerator(link)
elif arg.startswith('-page:'):
singlepage = pywikibot.Page(pywikibot.getSite(), arg[6:])
gen = iter([singlepage])
#else:
#bug
if action == 'pages':
preloadingGen = pagegenerators.PreloadingGenerator(gen)
bot = WikiBot(preloadingGen, acceptall=False)
bot.pages()
else:
pywikibot.showHelp(u'commons_link')
finally:
pywikibot.stopme()