nina.py
No OneTemporary
Actions

Size

9 KB

Referenced Files

None

Subscribers

None

nina.py
View Options

	#!/usr/bin/python
	import argparse
	import csv
	import sys
	from bs4 import BeautifulSoup


	#-----------------------------------
	# Management of arguments
	parser = argparse.ArgumentParser(description='Colours counties in SVG map depending on data.')
	parser.add_argument('-i', '--infile', dest='infile', action='store',
	default="data.txt",
	help='set input from statistical data file (default: \"data.txt\")')
	parser.add_argument('-m', '--map', dest='mapfile', action='store',
	default="map.svg",
	help='set SVG file to be coloured (default: \"map.svg\")')
	parser.add_argument('-o', '--output', dest='outfile', action='store',
	default="output.svg",
	help='set output file (default: \"output.svg\")')

	args = parser.parse_args()
	#-----------------------------------
	# Loads data from data file
	# (Specific to "FATAL ENCOUNTERS SPREADSHEET (Responses) - Form Responses.csv")
	def loadDataFromFile(infile):
	dataBySubdivision = {}
	dataLine = csv.reader(open(infile), delimiter=",")
	next(dataLine) # skip headers
	for row in dataLine:
	victimName = row[1].strip()
	victimAge = row[2].strip()
	victimGender = row[3].strip()
	victimSoCalledRace = row[4].strip()
	victimPhoto = row[5].strip()
	incidentDate = row[6].strip()
	incidentAddress = row[7].strip()
	incidentCity = row[8].strip()
	incidentState = row[9].strip()
	incidentZipCode = row[10].strip()
	incidentCounty = row[11].strip()
	incidentResponsible = row[12].strip()
	causeOfDeath = row[13].strip()
	circumstances = row[14].strip()
	justifiedOrNot = row[15].strip()
	newsArticle = row[16].strip()
	psychiatricEvaluation = row[17].strip()
	source = row[18].strip()
	emailAddress = row[19].strip()
	date = row[20].strip()
	#nothing at position 21
	IncidentIdentifier = row[22].strip()

	subdivision = incidentCounty+", "+incidentState
	if(subdivision in dataBySubdivision.keys()):
	dataBySubdivision[subdivision] = dataBySubdivision[subdivision]+1
	else:
	dataBySubdivision[subdivision] = 1
	return dataBySubdivision

	#-----------------------------------
	# Levensthein distance between two strings
	def levenshtein(arg1, arg2):
	#trivial cases
	if arg1 == arg2: return 0
	if arg1 == '' : return len(arg2)
	if arg2 == '' : return len(arg1)
	#
	distance = 666 # If "666" is returned, you probably have a problem.
	v1 = [i for i in range (0, len(arg2))]
	v2 = [i for i in range (0, len(arg2))]
	for i in range (0, len(arg1)):
	v2[0] = i + 1
	for j in range (1, len(arg2)):
	cost = 0
	if not arg1[i] == arg2[j]:
	cost = 1
	v2[j] = min(v2[j-1]+1, v1[j], v1[j-1]+cost)
	for j in range (0, len(v1)):
	v1[j] = v2[j]
	distance = v2[len(arg2)-1]-1
	# Uncomment this line if you want to see the values (debug)
	#print "Dist \""+arg1+\"" - \""+arg2+"\": "+str(distance)
	return distance

	# Returns the element of argList that has the smallest distance to argString
	def levenshtein_all(argString, argList):
	#trivial case
	if(argString in argList):
	return argString
	#non-trivial
	bestDistance = 999999
	bestCandidate = ""
	for argListElement in argList:
	thisDistance = levenshtein(argListElement, argString)
	if thisDistance < bestDistance:
	bestDistance = thisDistance
	bestCandidate = argListElement
	return bestCandidate

	#-----------------------------------
	# Creates a dictionary of subdivision identifiers
	# keys: subdivisions per Data file conventions (as found in dataBySubdivision)
	# values: subdivision identifiers as present in the map (SVG) file.
	def fillSubdivisionDict(dataDict, svgMapFile):
	subdivisionLabelCorrespondanceDict = {}
	subdivisionLabelsInSVG = []
	subdivisionLabelsInData = dataDict.keys()

	# Filling subdivisionLabelsInSVG
	svg = open(svgMapFile, 'r').read()
	subdivisionTag = "path"
	labelTag = "inkscape:label"
	soup = BeautifulSoup(svg)
	#soup = BeautifulSoup(svg, selfClosingTags=['defs','sodipodi:namedview'])
	subdivisions = soup.findAll(subdivisionTag)
	for subdivision in subdivisions:
	if subdivision.has_attr(labelTag):
	subdivisionLabelsInSVG.append(subdivision[labelTag])

	# Detect mismatchs and attempt to find correspondances.
	mismatchingLabelsDict = {}
	counter = 0
	for subdivisionLabelInData in subdivisionLabelsInData:
	sys.stdout.write("\rChecking data consistancy: %d / %d" % (counter,len(subdivisionLabelsInData)) )
	sys.stdout.flush()
	#print("\r"+str(counter) + "/" + str(len(subdivisionLabelsInData)))
	counter = counter + 1
	correspondingLabel = levenshtein_all(subdivisionLabelInData, subdivisionLabelsInSVG)
	subdivisionLabelCorrespondanceDict[subdivisionLabelInData] = correspondingLabel
	#subdivisionLabelsInSVG.remove(correspondingLabel)
	if not (subdivisionLabelInData == correspondingLabel):
	#print ( subdivisionLabelInData+" --> "+correspondingLabel )
	mismatchingLabelsDict[subdivisionLabelInData] = correspondingLabel
	print()

	if (len(mismatchingLabelsDict) > 0):
	print(" *** CAUTION: the following names do not match in Data file and Map file and")
	print(" will automatically be made to match. Please check for errors." )
	print(" If necessay, correct the labels in the data file and re-run." )
	print("" )
	print(" Name in Data --> Name in Map." )
	print(" -----------------------------" )
	for i in mismatchingLabelsDict:
	print(" "+i+" --> "+mismatchingLabelsDict[i])
	#if not (i == subdivisionLabelCorrespondanceDict[i]):
	#print ( i+" --> "+subdivisionLabelCorrespondanceDict[i] )

	return subdivisionLabelCorrespondanceDict
	#-----------------------------------
	# Precomputes hexadecimal colour values to put into SVG
	def computeColours(dataBySubdivision, subdivisionIndentifierCorrespondance):
	colourBySubdivision = {}
	maximumValue = max(dataBySubdivision.values())

	import math
	for i in dataBySubdivision:
	greenValue = (maximumValue - dataBySubdivision[i])/maximumValue*255
	#redValue = dataBySubdivision[i]/maximumValue*255
	redValue = math.log(dataBySubdivision[i])/math.log(maximumValue)*255
	blueValue = 0
	#rgbTuple = (greenValue, redValue, blueValue)
	rgbTuple = (greenValue, greenValue, greenValue)
	hexColour = '#%02x%02x%02x' % rgbTuple
	#hexColour = "#FFFF00"
	colourBySubdivision[subdivisionIndentifierCorrespondance[i]]=hexColour

	return colourBySubdivision

	#-----------------------------------
	# puts desired colours into SVG file.
	def editMap(svgMapFile, colourBySubdivision):

	svg = open(svgMapFile, 'r').read()
	subdivisionTag = "path"
	labelTag = "inkscape:label"
	soup = BeautifulSoup(svg)
	subdivisions = soup.findAll(subdivisionTag)

	#pathStyle = "font-size:12px;fill:#d0d0d0;fill-rule:nonzero;stroke:#000000;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel"
	pathStylePrefix = "font-size:12px;fill:"
	pathStylePostfix = ";fill-rule:nonzero;stroke:#000000;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel"

	#print(colourBySubdivision.keys())
	for subdivision in subdivisions:
	#print (subdivision['id'], subdivision['inkscape:label'])
	if (subdivision['id'] not in ["State_Lines", "separator"]) :
	if (subdivision['inkscape:label'] in colourBySubdivision.keys()):
	subdivisionColour = colourBySubdivision[subdivision['inkscape:label']]
	#print(" known subdivision: "+subdivision['inkscape:label']+", colour = "+subdivisionColour)
	else:
	subdivisionColour = "#FFFFFF"
	#print(" *** CAUTION: unknown subdivision: "+subdivision['inkscape:label'])
	subdivision['style'] = pathStylePrefix + subdivisionColour + pathStylePostfix
	#print(sorted(colourBySubdivision.keys()))
	# Output map
	return soup.prettify()

	#-----------------------------------
	def saveMap(svgContent, outfile):
	print("Saving results to "+outfile+"..."),
	f = open(outfile,'w')
	f.write(svgContent) # python will convert \n to os.linesep
	f.close() # you can omit in most cases as the destructor will call if
	print(" DONE.")
	###################################################################
	#-----------------------------------
	print("Reading statistical data from "+args.infile+" and geographic data from "+args.mapfile+". Outputing results to "+args.outfile+".")

	dataBySubdivision = {}
	dataBySubdivision = loadDataFromFile(args.infile)

	subdivisionIndentifierCorrespondance = {} # translates the identifiers of subdivisions from Data file conventions to SVG map conventions.
	subdivisionIndentifierCorrespondance = fillSubdivisionDict(dataBySubdivision, args.mapfile)

	colourBySvgSubdivision = computeColours(dataBySubdivision, subdivisionIndentifierCorrespondance)
	svgContent = editMap(args.mapfile, colourBySvgSubdivision)
	saveMap(svgContent, args.outfile)


	print("I rest my case")

File Metadata

Mime Type: text/x-python
Expires: Mon, Sep 15, 09:56 (4 h, 22 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 2980469
Default Alt Text: nina.py (9 KB)

nina.pyNo OneTemporaryActions

nina.pyView Options

File Metadata

Event Timeline

nina.py
No OneTemporary
Actions

nina.py
View Options