#!/usr/bin/python import psycopg2 import sys import rpy2.robjects as R def dbstr(): return open("db.txt").read() def chisqfit(indus): conn = psycopg2.connect(dbstr()) cur = conn.cursor() cur.execute("select area, count(*) from sales, cust " +" where sales.cid=cust.cid and indus = '"+indus+"' group by area order by area;") sls = [] ars = [] for rec in cur: (area, cnt) = rec ars += [ area ] sls += [ int(cnt) ] alpha = 0.05 x = R.FloatVector(sls) res = R.r['chisq.test'](x) pval = float(str(res[2]).strip()[4:]) printHTML("output-" + indus + ".html", indus, ars, sls, pval, alpha) printRTF( "output-" + indus + ".rtf", indus, ars, sls, pval, alpha) def printHTML(fn, indus, ars, sls, pval, alpha): f = open(fn, "w") f.write("
Sales data: " + str(sls) + "\n")
if pval <= alpha:
f.write("
This result is significant at alpha = " + str(alpha) + ".\n")
else:
f.write("
This result is not significant at alpha = " + str(alpha) + ".\n")
f.write("
The probability of arriving at these sales data")
f.write(" when the actual frequencies are identical is " + str(pval) + ".\n")
f.close()
print "File", fn, "written."
def printRTF(fn, indus, ars, sls, pval, alpha):
f = open(fn, "w")
f.write("{\\rtf {\\fonttbl {\\f0 Times New Roman;}}\n")
f.write("\\f0\\fs30 Testing " + indus + " data for goodness of fit in areas " + str(ars) + "\\par\\par\n")
f.write("\\f0\\fs20 Sales data: " + str(sls) + "\n")
if pval <= alpha:
f.write("\\par This result is significant at alpha = " + str(alpha) + ".\n")
else:
f.write("\\par This result is not significant at alpha = " + str(alpha) + ".\n")
f.write("\\par The probability of arriving at these sales data")
f.write(" when the actual frequencies are identical is " + str(pval) + ".\n")
f.write("}\n")
f.close()
print "File", fn, "written."
def main():
for indus in open("industries.txt").readlines():
chisqfit(indus.strip())
if __name__ == '__main__': main()