#!/usr/bin/python import psycopg2 import sys import rpy2.robjects as R def dbstr(): return open("db.txt").read() def chisqfit(indus): conn = psycopg2.connect(dbstr()) cur = conn.cursor() cur.execute("select area, count(*) from sales, cust " +" where sales.cid=cust.cid and indus = '"+indus+"' group by area order by area;") sls = [] ars = [] for rec in cur: (area, cnt) = rec ars += [ area ] sls += [ int(cnt) ] alpha = 0.05 x = R.FloatVector(sls) res = R.r['chisq.test'](x) pval = float(str(res[2]).strip()[4:]) printHTML("output-" + indus + ".html", indus, ars, sls, pval, alpha) printRTF( "output-" + indus + ".rtf", indus, ars, sls, pval, alpha) def printHTML(fn, indus, ars, sls, pval, alpha): f = open(fn, "w") f.write("

Testing " + indus + " data for goodness of fit in areas " + str(ars) + "

\n") f.write("

Sales data: " + str(sls) + "\n") if pval <= alpha: f.write("
This result is significant at alpha = " + str(alpha) + ".\n") else: f.write("
This result is not significant at alpha = " + str(alpha) + ".\n") f.write("
The probability of arriving at these sales data") f.write(" when the actual frequencies are identical is " + str(pval) + ".\n") f.close() print "File", fn, "written." def printRTF(fn, indus, ars, sls, pval, alpha): f = open(fn, "w") f.write("{\\rtf {\\fonttbl {\\f0 Times New Roman;}}\n") f.write("\\f0\\fs30 Testing " + indus + " data for goodness of fit in areas " + str(ars) + "\\par\\par\n") f.write("\\f0\\fs20 Sales data: " + str(sls) + "\n") if pval <= alpha: f.write("\\par This result is significant at alpha = " + str(alpha) + ".\n") else: f.write("\\par This result is not significant at alpha = " + str(alpha) + ".\n") f.write("\\par The probability of arriving at these sales data") f.write(" when the actual frequencies are identical is " + str(pval) + ".\n") f.write("}\n") f.close() print "File", fn, "written." def main(): for indus in open("industries.txt").readlines(): chisqfit(indus.strip()) if __name__ == '__main__': main()