ویکی‌پدیا:درخواست‌های ربات/ربات مختصات جغرافیا/ویرایش ۵

مختصات: مختصات: عرض جغرافیایی وارد نشده‌است
از ویکی‌پدیا، دانشنامهٔ آزاد

<syntaxhighlight lang="python">

  1. -*- coding: utf-8 -*-
  2. solving "{{coord|LAT|" problem

""" This bot will make direct text replacements. It will retrieve information on which pages might need changes either from an XML dump or a text file, or only change a single page.

You can run the bot with the following commandline parameters:

-file - Work on all pages given in a local text file.

              Will read any wiki link and use these articles.
              Argument can also be given as "-file:filename".

-cat - Work on all pages which are in a specific category.

              Argument can also be given as "-cat:categoryname".

-page - Only edit a specific page.

              Argument can also be given as "-page:pagetitle". You can give this
              parameter multiple times to edit multiple pages.

-ref - Work on all pages that link to a certain page.

              Argument can also be given as "-ref:referredpagetitle".

-filelinks - Works on all pages that link to a certain image.

              Argument can also be given as "-filelinks:ImageName".

-links - Work on all pages that are linked to from a certain page.

              Argument can also be given as "-links:linkingpagetitle".

-start - Work on all pages in the wiki, starting at a given page. Choose

              "-start:!" to start at the beginning.
              NOTE: You are advised to use -xml instead of this option; this is
              meant for cases where there is no recent XML dump.

-except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given,

              XYZ will be regarded as a regular expression.

-summary:XYZ - Set the summary message text for the edit to XYZ, bypassing the

              predefined message texts with original and replacements inserted.

-template:XYZ- -namespace:n - Number of namespace to process. The parameter can be used

              multiple times. It works in combination with all other
              parameters, except for the -start parameter. If you e.g. want to
              iterate over all user pages starting at User:M, use
              -start:User:M.

-always - Don't prompt you for each replacement other: -

NOTE: Only use either -xml or -file or -page, but don't mix them.

Examples:

"""

  1. Utente:Wiso 2007
  2. Distributed under the terms of the GPL licence

from __future__ import generators import sys,re,pprint import wikipedia,pagegenerators,catlib,config

__version__ = '$Id: coordbot.py,v 0.1 $'

  1. Summary messages in different languages
  2. NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
  3. below.`v

msg = u'ربات:افزودن مختصات %s'

templates = {

   'safe': [
  1. Every Wiki:
   ( r'\{\{ ?[Cc]oord(.*?)\}\}',r"الگو:Coord\1\n" ),
   ( r'{{coor[_ ]title[_ ]d\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}',r"متغیرهای نامعتبر در {{#coordinates:}} واردشده است.\n" ),
   ( r'{{coor[_ ]title[_ ]dm\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^\}]*?)\}\}',r"متغیرهای نامعتبر در {{#coordinates:}} واردشده است.\n" ),
   ( r'{{coor[_ ]title[_ ]dms\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}',r"متغیرهای نامعتبر در {{#coordinates:}} واردشده است.\n" ),
   ( r'\{\{ ?[Cc]oor[ _]d\|([0-9\.+-]+)\|([0-9\.+-])(\|?[^\|]*)\}\}',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
   ( r'\{\{.*latd *= *([0-9\.]+).*longd ?= ?([0-9\.]+)',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
   ( r'.*\|lat_deg *= *([0-9\.]+).*\|lat_min *= *([0-9\.]+).*\n.*\|lon_deg *= *([0-9\.]+).*\|lon_min *= *([0-9\.]+).*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
  1. English Wiki:
   ( r'.*\|lat_deg *= *([0-9\.]+).*\|lat_min *= *([0-9\.]+).*\|lat_sec *= *([0-9\.]+).*\n.*\|lon_deg *= *([0-9\.]+).*\|lon_min *= *([0-9\.]+).*\|lon_sec *= *([0-9\.]+).*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),

( r'.*\|latd *= *([0-9\.]+).*\|*latm *= *([0-9\.]+).*\|*lats *= *([0-9\.]+).*\|*latNS *= (.*?[NS])\n.*\|longd *= *([0-9\.]+).*\|*longm *= *([0-9\.]+).*\|*longs *= *([0-9\.]+).*\| longEW = (.*?[EW])*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ), ( r'.*\|*source_lat_d *= *([0-9\.]+).*\|*source_lat_m *= *([0-9\.]+).*\|*source_lat_s *= *([0-9\.]+).*\|*source_lat_NS *=*(.*?[NS])\n.*\| source_long_d *= *([0-9\.]+).*\|*source_long_m *= *([0-9\.]+).*\|*source_long_s *= *([0-9\.]+).*\| source_long_EW =*(.*?[EW])*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),

  1. Italian Wiki:

( r'.*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|*\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),

   ( r'.*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|latSecondi *= *([0-9\.]+).*\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|longSecondi *= *([0-9\.]+).*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
   ( r'..*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|latSecondi *= *([0-9\.]+).*\n.*\|latitudineNS *=(.*?[NS])\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|longSecondi *= *([0-9\.]+).*\n.*\|longitudineEW *=(.*?[EW])*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
   ],
   'notsafe': [
   ( r'\{\{ ?[Cc]oord[ _]dm\|([0-9]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
   ( r'\{\{ ?[Cc]oor[ _]dms\|([0-9]+)\|([0-9\.]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
   ]
   }
#Add Execption (for pages that don't need any Coordinaion or have a Coord:)

exceptions = [ r'\{\{ *?Geobox',

              r'\{\{ ?[Cc]oord',
              r'\{\{ ?Template:[Cc]oord',
              r'\{\{ ?[mM]ontagna',
              r'\{\{ ?(Template:)?[cC]omune',
              r'\{\{ ?[cC]ittأ ',
              r'\{\{ ?[mM]unicipalitأ ',
             r'\{\{ ?[aA]eroporto\|',
              r'\{\{ ?[Mm]unicipi',
              r'\{\{ ?[iI]nfobox[ _]Azienda\|',
              r'\{\{ ?[Ss]\|aziende',
              r'\{\{ ?[Dd]isambigua\|',
              r'\{\{ ?[Ff]razione',
              r'\{\{ ?[Ss]quadra',
              r'\{\{ ?[Pp]asso ?(\||\n)',
              r'\{\{ ?[Bb]undesland[ _]tedesco'
              ]    


class CoordRobot:

   """
   A bot that import coordinates from other wikipedia.
   """
   def __init__( self,generator,autoTitle = False,autoText = False ):
       self.generator = generator
       self.compileregex()

   def compileregex( self ):
       for key in templates.keys():        
           for i in range( len( templates[key] ) ):
               old,new = templates[key][i]
               oldR = re.compile( old,re.UNICODE )
               templates[key][i] = oldR,new
       for i in range( len( exceptions ) ):
           exceptions[i] = re.compile( exceptions[i] )

   def checkExceptions( self,text ):
       for exception in exceptions:
           hit = exception.search( text )
           if hit:
              return hit.group( 0 )
       return False

   def change( self,page,new_text ):
       try:
           page.put( new_text )
       except wikipedia.EditConflict:
           wikipedia.output( u'Skipping %s because of edit conflict' % ( page.title() ) )
       except wikipedia.SpamfilterError,url:
           wikipedia.output( u'Cannot change %s because of blacklist entry %s' % ( page.title(),url ) )
       except:
           a = 1

# Spceify the Wiki You want to get the Coords from (Now Italian):
   def run( self ):
       trovato_en = False
       sen = wikipedia.Site( 'en' )
       interwiki_list = []

pathWiki = wikipedia.getSite().family.nicepath('fa')

       for page in self.generator:

try:

           	text_it = page.get()

except: continue

           match = self.checkExceptions( text_it )

if u"" in text_it: continue

           # skip all pages that contain certain texts
           if match:
               colors = [None] * 9 + [None] * len( page.title() ) + [None] * 21 + [10] * len( match )
               wikipedia.output( u'Skipping %s because it contains %s' % ( page.title(),match ) )               
               continue

url = '%s%s' % (pathWiki, page.urlname())

           if u"""""" in wikipedia.getSite().getUrl(url):
		wikipedia.output( u'Skipping %s because it contains coordinate' %  page.title()) 
     		continue
           interwiki_list = page.interwiki()
           trovato_en = False
           for page_en in interwiki_list:
               if page_en.site() == sen:
                   trovato_en = True
                   break
           if not trovato_en:

print u"BAD"

               continue
           wikipedia.output( page.title() )
           wikipedia.output( u'en: %s' % page_en.title() )

if u"#" in page_en.title(): print u"We don't go on page sections. I'm sorry but I have to skip" continue

           try:
               text_en = page_en.get()
           except wikipedia.NoPage:
               wikipedia.output( u'Page %s not found' % page_en.title() )
               continue
           except wikipedia.IsRedirectPage:
               wikipedia.output( u'Page %s is a redirect, follow redirect' % page_en.title() )
               page_en=page_en.getRedirectTarget()
           pathWiki = wikipedia.getSite('en').family.nicepath('en')

url = '%s%s' % (pathWiki, page_en.urlname())

           html=wikipedia.getSite('en').getUrl(url)

print u"good" if u"""<a href=""" in html: R=re.compile(u"&params=(.+?)\"") listR=R.findall(html) if len(listR)>2: print u"something is wrong... skipped" continue a=listR[-1] if u"_W_" in a: splited=a.split(u"_W_") splited[0]=splited[0]+u"|W" else: splited=a.split(u"_E_") splited[0]=splited[0]+u"|E" if len(splited)>2: continue coords=splited[0] coords=coords.replace(u"_",u"|") template_new=u"متغیرهای نامعتبر در {{#coordinates:}} واردشده است." except: template_new=template_new+u"|display=title}}"

               wikipedia.output( template_new )

page.put(template_new+u"\n"+text_it,u"ربات: افزودن مختصات از ویکی‌پدیای انگلیسی")

def main():

 try:
   gen = None
   # summary message
   summary_commandline = None
   # Don't edit pages which contain certain texts.
   exceptions = []
   # commandline paramater.
   # Which namespaces should be processed?
   # default to [] which means all namespaces will be processed
   namespaces = []
   template = None
   PageTitles = []
   autoText = False
   autoTitle = False
   # This factory is responsible for processing command line arguments
   # that are also used by other scripts and that determine on which pages
   # to work on.
   genFactory = pagegenerators.GeneratorFactory()
   # Load default summary message.
   # BUG WARNING: This is probably incompatible with the -lang parameter.
   wikipedia.setAction( msg )

   # Read commandline parameters.
   for arg in wikipedia.handleArgs():
       if arg == '-autotitle':
           autoTitle = True
       elif arg == '-autotext':
           autoText = True
       elif arg.startswith( '-page' ):
           if len( arg ) == 5:
               PageTitles.append( wikipedia.input( u'Which page do you want to chage?' ) )
           else:
               PageTitles.append( arg[6:] )
       elif arg.startswith( '-except:' ):
           exceptions.append( arg[8:] )
       elif arg.startswith( '-template:' ):
           template = arg[10:]
       elif arg.startswith( '-namespace:' ):
           namespaces.append( int( arg[11:] ) )
       elif arg.startswith( '-summary:' ):
           wikipedia.setAction( arg[9:] )
           summary_commandline = True
       else:
           generator = genFactory.handleArg( arg )
           if generator:
               gen = generator
   print namespaces
   if PageTitles:
       pages = [wikipedia.Page( wikipedia.getSite(),PageTitle ) for PageTitle in PageTitles]
       gen = iter( pages )
   if not gen:
       # syntax error, show help text from the top of this file
       wikipedia.showHelp( 'coordbot' )
       wikipedia.stopme()
       sys.exit()
   if namespaces != []:
       gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces )
  1. gen = pagegenerators.RedirectFilterPageGenerator(gen)
   preloadingGen = pagegenerators.PreloadingGenerator( gen ,pageNumber = 120)
   bot = CoordRobot( preloadingGen,autoTitle,autoText )
   bot.run()
 except:
     a = 1


if __name__ == "__main__":

   try:

       main()
   except:
       a = 1