diff --git a/src/__pycache__/idTagDetector.cpython-34.pyc b/src/__pycache__/idTagDetector.cpython-34.pyc new file mode 100644 index 0000000..cc845f2 Binary files /dev/null and b/src/__pycache__/idTagDetector.cpython-34.pyc differ diff --git a/src/idTagDetector.py b/src/idTagDetector.py new file mode 100644 index 0000000..1c40d94 --- /dev/null +++ b/src/idTagDetector.py @@ -0,0 +1,91 @@ +# takes a string as input. Detects SVG tags susceptible of designating geographical divisions +# such as continents, countries, administrative subdivitions, etc. + +#from bs4 import BeautifulSoup +import re + + +class IdTagDetector: + def __init__(self, arg): + self.arg=arg + self.detectedTags=[] + + self.stoplist=[] + self.stoplistPath="../configs/stoplists" + self.whitelist=[] + self.whitelistPath="../configs/whitelists" + # The prefixes that have been observed to hold geographic labels in the test maps + self.labelPrefixes = ["path class=", "inkscape:label=", "label=", "id="] + + # + def loadStopList(self): + return [] + + # + def loadWhiteList(self): + return [] + + + # + def listTags(self): + print(self.detectedTags) + + # + def detect(self): + candidates = [] + for labelPrefix in self.labelPrefixes: + regex = labelPrefix+"\"([^0-9]+?)\"" + candidates.extend( re.findall(regex, self.arg) ) + + self.detectedTags = candidates + + + + + #subdivisionTag = "path" + #labelTag = "inkscape:label" + + #soup = BeautifulSoup(self.arg) + #candidates = soup.findAll(subdivisionTag) + #for candidate in candidates: + #print(candidate) + ##if candidate[labelTag][0] == "#": + ##break + ##if candidate.has_attr(labelTag): + ##self.detectedTags.append(candidate[labelTag]) + ##self.detectedTags.append(candidate[labelTag]) + +##################################################################### +##################################################################### +############################################### +#../maps/Blank_Map_Africa_1932.svg +# +# ==> tn +############################################### +#../maps/USA_Counties_with_FIPS_and_names.svg +# +# ==> Teton, WY +############################################### +#../maps/Blank_map_of_Europe_1815.svg +# +# ==> Serbia +############################################### +#../maps/World98.svg +# + # +# + #==> Iran:Semnan Province + +##################################################################### +# path class="land tn" +# inkscape:label="Teton, WY" +# id="Serbia" +# + + + + + + + diff --git a/src/testIdTagDetector.py b/src/testIdTagDetector.py new file mode 100755 index 0000000..ee0e6be --- /dev/null +++ b/src/testIdTagDetector.py @@ -0,0 +1,27 @@ +#!/usr/bin/python + +from idTagDetector import IdTagDetector + + + +#----------------------------------- +# Management of arguments +#----------------------------------- +import argparse +parser = argparse.ArgumentParser(description='test of idTagDetector: takes SVG maps as argument; sould return a list of ID tags used in the file to designate continents, countries, administrative subdivisions, etc. in standard output.') +# positional argument +parser.add_argument("infiles", nargs='+', help="filenames of the SVG files against which to test idTagDetector.") + +args = parser.parse_args() + + +#----------------------------------- +# Main +#----------------------------------- +for infile in args.infiles: + print("Reading from "+infile+".") + svg = open(infile, 'r').read() + detector = IdTagDetector(svg) + detector.detect() + detector.listTags() +