User:RobotMichiel1972/wikidata lowercase.py

#!/usr/bin/python
# coding: utf-8

"""

Run:
Syntax: python wikidata_lowercase.py pages [-option]

and option can be one of these:
 * -cat     : Work on all pages which are in a specific category.
 * -ref     : Work on all pages that link to a certain page.
 * -link    : Work on all pages that are linked from a certain page.
 * -start   : Work on all pages on the home wiki, starting at the named page.
 * -page    : Work on one page.

"""

# Distributed under the terms of the MIT license.
#

__version__='$Id: commons_link.py 9692 2011-10-30 15:03:29Z xqt $'

import re
import wikipedia as pywikibot
import pagegenerators, catlib

comment1 = {
    'ar':u'..',
}

comment2 = {
    'ar':u'...',
}


class WikiBot:
    def __init__(self, generator, acceptall = False):
        self.generator = generator
        self.acceptall = acceptall

    def pages(self):
        # create a site object, here for nl-wiki
        site = pywikibot.getSite('nl')
 
        # get the data repository site for the given site
        repo = site.data_repository()
        for page in self.generator:
           if page.namespace()==0: 
            try:
                pywikibot.output(u'\n>>>> %s <<<<' % page.title())
                 
                # create a Page object for nl-wiki
                page = pywikibot.Page(site, page.title())
 
                # Now we create the corresponding DataPage:
                datapage = pywikibot.DataPage(page)

                # Get the data in dictionary format    
                dictionary = datapage.get()
                #print dictionary

                # Check if nl and en iw are available
                alllinks = dictionary['links']
                nlresult = alllinks.get('nlwiki', 'not found')
                enresult = alllinks.get('enwiki', 'not found')
                if nlresult in ['not found']:
                    print 'This wikidata page has no NL entry, skip'
                else:
                    if enresult in ['not found']:
                            print 'This wikidata page has no EN entry'
                    else:
                            print 'This wikidata page has a EN entry'

                            alllabel = dictionary['label']
                            nl_label = alllabel.get('nl', 'not found')
                            en_label = alllabel.get('en', 'not found')
                            if nl_label in ['not found']:
                                print 'This wikidata page has no NL label, skip'
                            else:
                                if en_label in ['not found']:
                                    print 'This wikidata page has no EN label'
                                else:
                                    print 'This wikidata page has a EN label'
                                    #print en_label[:1]
                                    #print en_label.isupper()
                                    #print nl_label[:1]
                                    #print nl_label.islower()
                                    
                                    if (en_label[:1].isupper()) or (nl_label[:1].islower()) or (en_label[:1] in ['\'','1','2','3','4','5','6','7','8','9']):
                                        #en: Uppercase, title, do nothing or, nl: is lowercase or en: is digit
                                        print ' --skip, no change needed'
                                    else:
                                        print ' --enwiki label is lowercase'
                                        new_nl_label = nl_label[:1].lower() + nl_label[1:]
                                        #print 'Nieuw label voorstel: ' + new_nl_label
                                         
                                        pywikibot.showDiff(nl_label, new_nl_label)
                                        if not self.acceptall:
                                            choice = pywikibot.inputChoice(
                                                    u'Do you want to accept these changes?',
                                                    ['Yes', 'No', 'All'], ['y', 'N', 'a'],
                                                    'N')
                                            if choice == 'a':
                                                    self.acceptall = True
                                        if self.acceptall or choice == 'y':
                                                try:
                                                    print 'maak edit??'
                                                    qnummer = dictionary['entity']   # get ID

                                                    wikidata = pywikibot.DataPage(repo, qnummer)
                                                    dictionary = wikidata.get()
                                                    test = dictionary['entity']

                                                    wikidata.setitem(summary=u"lowercase label",
                                                      items={'type': u'item', 'label': 'nl', 'value': new_nl_label })


                                                except pywikibot.EditConflict:
                                                    pywikibot.output(
                                                    u'Skipping %s because of edit conflict'
                                                    % (page.title()))
            except pywikibot.NoPage:
                    pywikibot.output(u'Page %s does not exist?!' % page.title())
            except pywikibot.IsRedirectPage:
                    pywikibot.output(u'Page %s is a redirect; skipping.'
                                    % page.title())
            except pywikibot.LockedPage:
                    pywikibot.output(u'Page %s is locked?!' % page.title())

                                        

if __name__ == "__main__":
    singlepage = []
    gen = None
    start = None
    try:
        action = None
        for arg in pywikibot.handleArgs():
            if arg == ('pages'):
                action = 'pages'
            elif arg.startswith('-start:'):
                start = pywikibot.Page(pywikibot.getSite(), arg[7:])
                gen = pagegenerators.AllpagesPageGenerator(
                    start.title(withNamespace=False),
                    namespace=start.namespace(),
                    includeredirects = False)
            elif arg.startswith('-cat:'):
                cat = catlib.Category(pywikibot.getSite(),
                                      'Category:%s' % arg[5:])
                gen = pagegenerators.CategorizedPageGenerator(cat)
            elif arg.startswith('-ref:'):
                ref = pywikibot.Page(pywikibot.getSite(), arg[5:])
                gen = pagegenerators.ReferringPageGenerator(ref)
            elif arg.startswith('-link:'):
                link = pywikibot.Page(pywikibot.getSite(), arg[6:])
                gen = pagegenerators.LinkedPageGenerator(link)
            elif arg.startswith('-page:'):
                singlepage = pywikibot.Page(pywikibot.getSite(), arg[6:])
                gen = iter([singlepage])
            #else:
                #bug

        if action == 'pages':
            preloadingGen = pagegenerators.PreloadingGenerator(gen)
            bot = WikiBot(preloadingGen, acceptall=False)
            bot.pages()
        else:
            pywikibot.showHelp(u'commons_link')
    finally:
        pywikibot.stopme()
  NODES