// File: la_stats_1971-2011_analysis_public.do
// Creator: Diarmuid McDonnell
// Created: 22/08/2018

/* 
   
	Syntax file for producing analysis underpinning Research Note: 
   
	McDonnell, D. P., Mohan, J., & Norman, P. "Charity Density and Social Need: A Longitudinal Perspective". 
		Nonprofit and Voluntary Sector Quarterly.
		
	Questions can be directed to corresponding author Diarmuid McDonnell (https://diarmuidm.github.io/).
   
	The files needed for analysis:
		- la_stats_1971-2011_analysis_public.dta OR
		- la_stats_1971-2011_analysis_public.csv
		
	The codebook describing the variables: la_stats_1971-2011_analysis_codebook.txt
		
	Contents:
		- Define Paths [DP001]
		- Descriptive Statistics [DS001]
		- Statistical Modelling [SM001]
		
*/


******* Preliminaries *******

// These are all handled by profile.do

/*
clear
capture clear matrix
set mem 400m // not necessary in recent versions of Stata
set more off, perm
set scrollbufsize 2048000
exit
*/

** [DP001]
include "C:\Users\mcdonndz-local\Desktop\github\a-tale-of-four-cities\do_files\leverhulme_paths.doi"


/* Empty figures folder */
	
	**shell rmdir $path1 /s /q
	/*
	pwd
	
	local workdir "C:\Users\mcdonndz-local\Desktop\github\a-tale-of-four-cities\figures\"
	cd `workdir'
	
	local datafiles: dir "`workdir'" files "*.gph"
	
	foreach datafile of local datafiles {
		rm `datafile'
	}
	
	local datafiles: dir "`workdir'" files "*.png"
	
	foreach datafile of local datafiles {
		rm `datafile'
	}
	*/


/* 1. Open the clean dataset */

**global tdate = subinstr("$S_DATE", " ", "", .) // Make sure I update this at the beginning of every analysis session.
**di `tdate'
**local tdate "27Mar2018"

capture log close
**log using $path9\rqone_`tdate'.log, text replace

use $path3\la_stats_1971-2011_analysis.dta, clear
capture datasignature report
count
desc, f
notes
/*
	NOTES:
		- Correlation matrix of Townsend to charpop for each year (see John's email and Liverpool slide). [DONE]
		- Decide on the correct correlation statistic: either Spearman's rank correlation coefficient or Pearson's r of the rank variables. [DONE]
		- Table of means of TS quintiles and charpop, by year (try and graph this; see old Stirling notes).
		- Consider standardised version of charpop. [DONE]
		- Consider violin plot. [DONE]
		- Look at weights and what they tell us (see Slack comments). [DONE]
		- Look at distribution of new charities between census years i.e. are they basing themselves in more deprived areas? Unlikely but worth checking.
		- Say something about differences between local authorities in terms of types of charities i.e. deprivation quintiles and % of social services.
		- Add in other explanatory variables for sensitivity analysis: `pph_cat` and ONS stats (see raw data folder)
		- Deciles of town and link to charpop; describe the relationship better i.e. sharp drop in charpop initially, then a leveling out, then an upward tick.
*/
		
	
	/* Export version for archiving */
	/*
		Rationalise number of variables. [DONE]
		Label all important variables. [DONE]
		Produce codebook.
		Export .dta and .csv formats.
	*/
		
	preserve
		// Rationalise notes and variables
		
		notes drop _dta in 8/20
		desc, f
		drop ln_punemp-cunum_19 lregpop-cprcv *merge
		label variable town2 "Squared value of Townsend Score"
		label variable lag_charpop "Number of charities per 5,000 residents in previous census period"
		label variable lag_lcharpop "Number of local charities per 5,000 residents in previous census period"
		label variable lag_ncharpop "Number of national charities per 5,000 residents in previous census period"
		label variable lag_ocharpop "Number of overseas charities per 5,000 residents in previous census period"
		forvalues i = 1/19 {
			label variable lag_charpop_icnpo`i' "Number of icnpo`i' charities per 5,000 residents in previous census period"
		}
		label variable ncharpop "Number of national charities per 5,000 residents"
		label variable ocharpop "Number of overseas charities per 5,000 residents"
		label variable charpop_change "Change in charity density between census periods"
		label variable town_change "Change in deprivation between census periods"
		label variable town2_change "Change in deprivation (squared term) between census periods"
		label variable pph_cat_change "Change in urban/rural density between census periods"
		label variable lag_charpop_change "Change in prior charity density between census periods"
		label variable rural_change "Local authority became more rural between census periods"
		label variable urban_change "Local authority became more urban between census periods"
		desc, f
		
		// Produce codebook
		
		capture log using C:\Users\mcdonndz-local\Desktop\github\a-tale-of-four-cities\logs\la_stats_1971-2011_analysis_codebook.txt, replace text
		desc, f
		capture log close
		
		// Export .dta and .csv versions
		
		sav $path3\la_stats_1971-2011_analysis_public.dta, replace
		export delimited using $path3\la_stats_1971-2011_analysis_public.csv, replace
		
	restore
		
	egen std_charpop = std(charpop)
	sum std* charpop, detail
	return list
	
	dotplot std_charpop , over(year) yline(-2 0 2, lpatt(longdash)) 
	vioplot std_charpop , over(year) yline(-2 0 2, lpatt(longdash)) obs

****************************************************************************************************************************

	
****************************************************************************************************************************

	
	/* Map of charity density */
	
	preserve
		use $path3\la_stats_1971-2011_maps.dta, clear
		desc, f
		l la_code la_name charpop_mean
		
		sum charpop_mean, detail
		*hist charpop_mean , norm freq scheme(s1mono)
		replace charpop_mean = round(charpop_mean, 1)
		
		capture grmap, activate
		grmap charpop_mean , clm(q) cln(5) ndo(red) legt("Density Quintiles") legs(2) legend(pos(10) size(vsmall)) ///
			title("Mean Charity Density (1971-2011)") subtitle("By local authority") ///
			note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) ///
			caption("Local authorities with a level of charity density in the 99th percentile are excluded.", size(vsmall))
		graph export $path6\la_stats_charpop_map.png, replace width(4096)
			
		forvalues y = 1971(10)2011 {
			grmap charpop`y', clm(q) cln(5) ndo(red) legt("Density Quintiles") legs(2) legend(pos(10) size(vsmall)) ///
				title("Mean Charity Density (`y')") subtitle("By local authority") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) ///
				caption("Local authorities with a level of charity density in the 99th percentile are excluded.", size(vsmall))
			graph export $path6\la_stats_charpop_map_`y'.png, replace width(4096)
		}
			
	restore
	
	
	/* Map of material deprivation */

	preserve
		use $path3\la_stats_1971-2011_maps.dta, clear
		desc, f
		l la_code la_name town_mean
		
		sum town_mean, detail
		*hist town_mean , norm freq scheme(s1mono)
		replace town_mean = round(town_mean, 1)
		
		capture grmap, activate
		grmap town_mean , clm(q) cln(5) ndo(red) legt("Density Quintiles") legs(2) legend(pos(10) size(vsmall)) ///
			title("Mean Townsend Score (1971-2011)") subtitle("By local authority") ///
			note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) ///
			caption("Local authorities with a level of charity density in the 99th percentile are excluded.", size(vsmall))
		graph export $path6\la_stats_town_map.png, replace width(4096)
			
		forvalues y = 1971(10)2011 {
			grmap town`y', clm(q) cln(5) ndo(red) legt("Density Quintiles") legs(2) legend(pos(10) size(vsmall)) ///
				title("Mean Townsend Score (`y')") subtitle("By local authority") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) ///
				caption("Local authorities with a level of charity density in the 99th percentile are excluded.", size(vsmall))
			graph export $path6\la_stats_town_map_`y'.png, replace width(4096)
		}
			
	restore
	
	
	/* Sample description */
	/*
		Overall statistics for the dependent and independent variables.
	*/
		
	xtdes
	count
		
	// Table A1. Percentages of people or households for each component measure of the Townsend Score, by census year
	
	tabstat punemp pnonown pnocar povercr, s(p50) by(year) // Lower than figures in Lloyd et al (2017) due to absence of Scottish LAs
	
	// Table 2. 
	
	tabstat popest town *cunum *charpop, s(p5 p25 p50 p75 p95 mean sd n)
	tabstat popest town *cunum *charpop, s(p5 p25 p50 p75 p95 mean sd n) by(year)
	
	sum charpop, detail
	hist charpop , norm freq // Poisson distribution
		
	// Figure 1.
	
	bys year: sum charpop, detail
	/*
		Look at percentage changes in charpop over time, by different areas and percentiles/quintiles of charpop.
	*/
	
	spearman charpop year 
	quietly sum charpop, detail
	local cmean = `r(mean)'
	local numobs = `r(N)'
	
	local fdate "20190129"

	
	dotplot charpop , over(year) median title("Distribution of Charity Density") subtitle("By census year") ///
		ytitle("No. of charities per 5,000 residents") yline(`cmean', lpatt(solid) lcolor(gs12)) ///
		note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
			"Local authorities with a level of charity density in the 99th percentile are excluded." ///
			"Solid line represents the mean value for the entire period." ///
			"Spearman's rho = .41 (p < .001)", span size(small)) ///
		scheme(s1mono)
	graph export $path6\la_stats_charpop_`fdate'.png, replace width(4096)
	
	sum lcharpop, detail
	dotplot lcharpop, over(year) median // Exclude extreme outliers; I like this graph
	graph export $path6\la_stats_lcharpop_`fdate'.png, replace width(4096)
	/*
		Not much difference between the two plots, unsurprising given that Local charities are the most common type.
	*/
	
	// Figure #.
	
	spearman town year 
	quietly sum town, detail
	local numobs = `r(N)'
	
	dotplot town, over(year) median title("Distribution of Deprivation") subtitle("By census year") ///
		ytitle("Townsend Score") yline(0.00, lpatt(solid) lcolor(gs12)) ///
		yline(-2 2, lpatt(longdash) lcolor(gs12)) ///
		text(-5.00 1971 "Less deprived", place(e) size(vsmall) color(gs1)) ///
		text(13.00 1971 "More deprived", place(e) size(vsmall) color(gs1)) ///
		note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'. Produced: $S_DATE" ///
			"Local authorities above the solid line are more deprived than average in a given year, while those below are less deprived." ///
			"Local authorities above and below the dashed lines are two standard deviations or more away from the average." ///
			"Spearman's rho = -.61 (p < .001)", span size(vsmall)) ///
		scheme(s1mono)
	graph export $path6\la_stats_town_`fdate'.png, replace width(4096)


	
	// Change over time in ratio of charities to population
	
	preserve
		collapse (p50) medchar=cunumpchange (p5) fchar=cunumpchange (p25) tchar=cunumpchange (p75) schar=cunumpchange (p95) nchar=cunumpchange (mean) mchar=cunumpchange, by(year)
		desc, f
		list
		foreach var of varlist medchar-mchar {
			replace `var'=0 if year==1971
		}
		lgraph fchar tchar medchar mchar schar nchar year, ///
			wide title("Percentage Change in Registered Charities") subtitle("By census year") ytitle("% change") ///
			ylabel(, labsize(small)) xlabel(, labsize(small)) ///
			legend(label(1 "p5") label(2 "p25") label(3 "median") label(4 "mean") label(5 "p75") label(6 "p95") size(vsmall) rows(2)) ///
			note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,650. Produced: $S_DATE" ///
				"1971 = base year.", span size(vsmall)) ///
			scheme(s1mono)			
		graph export $path6\la_stats_cunumchange_`fdate'.png, replace width(4096)
	restore
	
	preserve
		collapse (p50) medchar=charpoppchange (p5) fchar=charpoppchange (p25) tchar=charpoppchange (p75) schar=charpoppchange (p95) nchar=charpoppchange (mean) mchar=charpoppchange, by(year)
		desc, f
		list
		foreach var of varlist medchar-mchar {
			replace `var'=0 if year==1971
		}
		lgraph fchar tchar medchar mchar schar nchar year, ///
			wide title("Percentage Change in Charity/Population Ratio") subtitle("By census year") ytitle("% change") ///
			ylabel(, labsize(small)) xlabel(, labsize(small)) ///
			legend(label(1 "p5") label(2 "p25") label(3 "median") label(4 "mean") label(5 "p75") label(6 "p95") size(vsmall) rows(2)) ///
			note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,650. Produced: $S_DATE" ///
				"1971 = base year.", span size(vsmall)) ///
			scheme(s1mono)			
		graph export $path6\la_stats_charpopchange_`fdate'.png, replace width(4096)
	restore

	
		sum charpoppchange, detail
		dotplot charpoppchange if charpoppchange <= r(p99), over(year) median // Exclude extreme outliers; I like this graph
	
	// Figure A1. 
	preserve
		collapse (p50) medchar=cunumpchange medpop=popestpchange medratio=charpoppchange, by(year)
		desc, f
		list
		replace medchar=0 if year==1971
		replace medpop=0 if year==1971
		replace medratio=0 if year==1971
		twoway (scatter medchar year, c(l) m(Th) xlab(1971(10)2011)) (scatter medpop year, c(l) m(Oh)) (scatter medratio year, c(l) m(Dh)), ///
			title("Percentage Change in LA Characteristics") subtitle("By census year") ytitle("Median change (%)") ///
			legend(label(1 "Number of charities") label(2 "Size of population") label(3 "Charity/Population ratio") size(vsmall) rows(1)) ///
			note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,650. Produced: $S_DATE" ///
				"1971 = base year.", span size(vsmall)) ///
			scheme(s1mono)
		graph export $path6\la_stats_typchange_`fdate'.png, replace width(4096)
	restore
	
	// Changes in the level of registered charities and population over time
	
	tabstat popest cunum, s(p50) by(year) // Lower than figures in Lloyd et al (2017) due to absence of Scottish LAs

	// Variation in charpop over time - UPDATE!!
	
	tabstat charpop if la_name!="City of London", s(min max range p50) by(year) 
	di 37.9327/1.364685
	di 66.64366/2.138951
	di 90.14657/3.498927
	di 68.04243/4.581362
	di 66.46719/4.750844
	
	tabstat lcharpop if la_name!="City of London", s(min max range p50) by(year) 
	di 29.13164/.972625
	di 33.27556/1.426185
	di 37.03299/1.875568
	di 40.59825/4.252478
	di 35.61769/4.187781

	// Produce correlation matrix of char ratios for a given year (Table 2)
	
	pwcorr charpop lcharpop ncharpop ocharpop, star(.05) obs sig
	bys year: pwcorr charpop lcharpop ncharpop ocharpop, star(.05) obs sig
	/*
		Correlation between these variables for each time period. Now look at correlation within a variable across time periods.
	*/
	
		preserve
			keep la_id charpop period
			reshape wide charpop , i(la_id) j(period)
			desc, f
			list
			pwcorr charpop* , star(.05) obs sig
			twoway (scatter charpop5 charpop1) (lfit charpop5 charpop1) ///
			, xtitle("Density 1971") ytitle("Density 2011") ///
			legend(off) scheme(s1mono) 
			**pwcorr charpop* lcharpop* , star(.05) obs sig
			**pwcorr charpop* ncharpop* , star(.05) obs sig
			**pwcorr charpop* ocharpop* , star(.05) obs sig
		restore
		
		preserve
			keep la_id charpop_rank period
			reshape wide charpop_rank , i(la_id) j(period)
			desc, f
			list
			pwcorr charpop_rank* , star(.05) obs sig
			**pwcorr charpop* lcharpop* , star(.05) obs sig
			**pwcorr charpop* ncharpop* , star(.05) obs sig
			**pwcorr charpop* ocharpop* , star(.05) obs sig
		restore
		
		
	// Change in ranking for charpop
	/*
	hist charpop_rank_change ///
		, norm by(year) ///
		title("Distribution of Change in Charity Density Ranking") subtitle("By census year") ///
		scheme(s1mono) 
	hist lcharpop_rank_change, norm scheme(s1mono) by(year)
	hist ncharpop_rank_change, norm scheme(s1mono) by(year)
	hist ocharpop_rank_change, norm scheme(s1mono) by(year)
	*/
	
	bys year: sum charpop_rank_change, detail
	
	dotplot charpop_rank_change , over(year) title("Distribution of Change in Density Ranking") subtitle("By census year") ///
		ytitle("Change in ranking") yline(0, lpatt(solid) lcolor(gs12)) ///
		note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,650. Produced: $S_DATE" ///
			"Solid line represents the mean value for the entire period." , span size(vsmall)) ///
		scheme(s1mono)
	graph export $path6\la_stats_charpoprank_`fdate'.png, replace width(4096)
	/*
		Double-check the normal distribution of these variables is correct and not a relic of data management gremlins.
	*/
	
		// Coefficient of variation for charpop_ranking
		
		hist cprcv ///
			, norm percent scheme(s1mono) title("Variability in Charity Density Ranking") subtitle("1971-2011") ///
			xtitle("Coefficient of Variation (%)")
		sum cprcv, detail
		list la_name charpop_rank cunum popest year cprcv if cprcv >= `r(p95)'
		list la_name charpop_rank cunum popest year cprcv if cprcv <= `r(p5)'
	
	// Coefficient of variation for charity density

	sum overallcv, detail
	list la_name charpop cunum popest year overallcv if overallcv >= `r(p95)'
	list la_name charpop cunum popest year overallcv if overallcv <= `r(p5)'
		 
	sum lcv, detail
	list la_name lcharpop cunum popest year lcv if lcv >= `r(p95)'
	list la_name lcharpop cunum popest year lcv if lcv <= `r(p5)'
		 
	 sum ncv, detail
	 list la_name ncharpop cunum popest year ncv if ncv >= `r(p95)'
	 list la_name ncharpop cunum popest year ncv if ncv <= `r(p5)'
			 
	 sum ocv, detail
	 list la_name ocharpop cunum popest year ocv if ocv >= `r(p95)'
	 list la_name ocharpop cunum popest year ocv if ocv <= `r(p5)'
	/*
		This is quite good actually. Plenty of variability in the charpop ratio.
		Do the same for cunum and popest. And move this code to the data management do file.
	*/
		
		// Correlation of charpop over time
		
		spearman charpop year // Strong, positive assocation i.e. more density over time.

	
	/* Bivariate associations between key indicators */
	
	// Association between number of charities and population
	
	pwcorr cunum popest, star(.05) obs sig
	twoway (scatter cunum popest) (lfit cunum popest)
	bysort year: pwcorr cunum popest, star(.05) obs sig
	
		pwcorr lcunum popest, star(.05) obs sig
		bysort year: pwcorr lcunum popest, star(.05) obs sig
	/*
		More evidence of the strong predictive power of population size on the number of charitable organisations
	*/
	
	
	// Association between ratio and deprivation
	/*
		See what happens if I exclude the 95th percentile of town score as well: does the association become linear?
	*/
	preserve
		forvalues i = 1/5 {
			quietly sum town if period==`i', detail
			tab la_name if period==`i' & town >=`r(p95)'
			drop if town >=`r(p95)' & period==`i'
		}
	restore
	/*
		There is an argument to exclude London from the analysis; see what John says.
	*/
	
	// Produce correlation matrix of deprivation to char ratio (Table 3)
	
	spearman charpop town
	bysort year: spearman charpop town
	
		spearman lcharpop town
		spearman ncharpop town
		spearman ocharpop town
		bys year: spearman lcharpop town
		bys year: spearman ncharpop town
		bys year: spearman ocharpop town
	
		corr charpop town
		bysort year: corr charpop town
	
		// Check robustness using Kendall's tao
		
		ktau charpop town
		bysort year: ktau charpop town
		/*
			Unpack these findings: still appears to be a moderate/strong association in the same direction.
			
			What happens if I exclude London LAs? https://en.wikipedia.org/wiki/Local_government_in_London
		*/

	
	
	/* Figure 2. Distribution of charity density, by Townsend Score and census year */
	
	local fdate "20190129"
	preserve
				
		levelsof year, local(levels)
		foreach l of local levels {
			quietly sum charpop if year==`l', detail
			*return list
			
			label variable town " " // Remove variable label so it does not display on the graphs
			twoway (scatter charpop town if year==`l' & london==., msymbol(Oh) mcolor(%100)) ///
				(scatter charpop town if year==`l' & london==1, msymbol(X)) (qfit charpop town if year==`l', lwidth(medium) lpatt(shortdash)) ///
				, title("`l'") ///
				ylabel(, labsize(small)) xlabel(, labsize(small)) ///
				legend(off) ///
				scheme(s1mono) ///
				name(g`l')
			*graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096)
		}
		
		graph combine g1971 g1981 g1991 g2001 g2011 ///
			, title("Distribution of Charity Density") subtitle("By Level of Deprivation") ///
			l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
			note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,635." ///
			"Local authorities with a level of charity density in the 99th percentile are excluded." ///
			"Local authorities based in London are marked with an X.", span size(small))
		graph export $path6\la_stats_scatter_overall_`fdate'.png, replace width(4096)	
		
	restore
	
		// Now graph the ranking variables
		
		preserve
			*drop if charpop>60
			*separate charpop, by(period)
					
			levelsof year, local(levels)
			foreach l of local levels {
				quietly sum charpop if year==`l', detail
				return list
				
				twoway (scatter charpop_rank town_rank if year==`l', msymbol(Oh) mcolor(%70)) (fpfit charpop_rank town_rank if year==`l') ///
					, title("`l'") ///
					ytitle("No. of charities per 5,000 residents (rank)") xtitle("Townsend Score (rank)") ///
					ylabel(, labsize(small)) xlabel(, labsize(small)) ///
					legend(off) ///
					scheme(s1mono) ///
					name(r`l')
				graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096)
			}
			
			graph combine r1971 r1981 r1991 r2001 r2011 ///
				, title("Distribution of Charities/Population Ratio") subtitle("By Townsend Score")
			graph export $path6\la_stats_rank_overall_`fdate'.png, replace width(4096)	
		restore
	
		sort year charpop_rank
		list la_name year charpop_rank town_rank if period==3
		
		bys year: spearman charpop_rank town_rank
		bys year: spearman charpop town
		/*
			Gives the same result.
		*/
		
		pwcorr charpop_rank town_rank, star(.05) obs sig
		pwcorr charpop_rank town_rank [w=popest], star(.05) obs sig
		bys year: pwcorr charpop_rank town_rank, star(.05) obs sig
		bys year: pwcorr charpop_rank town_rank [w=popest], star(.05) obs sig
		/*
			Weights make very little difference to correlations.
		*/
		
		twoway (scatter charpop_rank town_rank, msymbol(Oh) mcolor(%70) yscale(reverse)) (lfit charpop_rank town_rank), ///
			by(year)
		/*
			The little cluster in the bottom left are the London LAs which rank very highly for deprivation and the number of charities.
			
			I think Pearson correlation for rank values is the best approach.
		*/
		
		
		// Weighted by popest and cunum
		pwcorr charpop town, star(.05) obs sig
		pwcorr charpop town [aweight=popest], star(.05) obs sig
		pwcorr charpop town [aweight=cunum], star(.05) obs sig
		/*
			This is playing out exactly like Dave said it would! Unpack this:
		*/
		
				pwcorr charpop popest, star(.05) obs sig // Small, negative association between charpop and popest
				pwcorr charpop cunum, star(.05) obs sig // Moderate, positive association between charpop and cunum
				pwcorr cunum popest, star(.05) obs sig // Strong, positive association between cunum and popest
				
				xtile popest_quin = popest, n(5)
				tab popest_quin
				table popest_quin year, c(p50 charpop p50 cunum) // This is an interesting table
		
	
		
	
	// Ratio of charities to population for each year and quintile
	
	tab quin year
	
	forvalues i = 1/5 {
		tabstat charpop if period==`i', s(mean p50 n) by(quin) 
		sum charpop if period==`i', detail
		table quin year if charpop<=`r(p99)' & period==`i', c(p50 charpop)
	}
	
 // Need to exclude the 99th percentile of charpop
	/*
		Focus on the quintiles: pop, charpop, lcharpop.
		
		If I'm interpreting this right, with the exception of 2001 the lowest quintile (i.e. most deprived) had
		the highest ratio of charities per population (considerably more) if using the mean. The median tells a completely
		different story!
		
		Convert to a table: year in columns, quintiles in rows.
	*/

	
	/* Association between 1971 and 2011 */
	
	
	/* Subsector analysis */
	
	// Produce Figure 3 by ICNPO -> produce graphs for every icnpo category but perhaps focus on 4 subsectors in particular (most common, substantively interesting?)
	
	
	
****************************************************************************************************************************

	
****************************************************************************************************************************	
		
		
	/* Change in ranking over time */
	/*
		Variables:
			- popest_rank_change 
			- cunum_rank_change 
			- charpop_rank_change
	*/
	
	// Table of key stats for our four areas
	
	cls
	preserve
		keep if la_name=="Birmingham" | la_name=="Tower Hamlets" | la_name=="Bolton" | la_name=="Burnley"
		table la_name year, c(mean popest mean town mean charpop mean charpoppchange) format(%9.0fc)
		table la_name year, c(mean popest_rank mean town_rank mean charpop_rank) format(%9.0fc)
		*bysort year: corr charpop town
		*twoway (scatter charpop town) (lfit charpop town)
	restore
	/*
		Create variables capturing % change from 1971 - 2011, and ranking of percentage change.
	*/
	
	tab charpop_rank if year==1971, sort
	list la_id la_name year charpop_rank if charpop_rank <=10 & year==1971 // Top 10 in 1971
	list la_id la_name year charpop_rank if charpop_rank >=321 & year==1971 // Bottom 10 in 1971
	
	// Top 10 in 1971
	
	preserve
		keep if charpop_rank <=10 | charpop_rank >=321
		levelsof la_id if charpop_rank <=10 & year==1971, local(top10)
		foreach l of local top10 {
			list la_name year charpop charpop_rank charpop_rank_change if la_id==`l', clean noobs
			table la_name year if la_id==`l', c(mean charpop mean charpop_rank mean charpop_rank_change)
		}
		/*
			Very little change in rankings, even as charpop varies to some degree.
		*/
	restore
	
	// Bottom 10 in 1971
	
	preserve
		keep if charpop_rank <=10 | charpop_rank >=321
		levelsof la_id if charpop_rank >=321 & year==1971, local(bottom10)
		foreach l of local bottom10 {
			list la_name year charpop charpop_rank charpop_rank_change if la_id==`l', clean noobs
			table la_name year if la_id==`l', c(mean charpop mean charpop_rank mean charpop_rank_change)
		}
		/*
			Little change in rankings, even as charpop increases over time.
		*/
	restore
	
	// Which local authorities experienced the largest change in charpop and charpop_rank, by year?


****************************************************************************************************************************

	
****************************************************************************************************************************

	/* Statistical modelling [SM001] */
	/*
		The dependent variable is charpop; the independents are townsend score (polynomial), census year, lagged density, 
		urban/rural, and interactions.
		
		Approach:
			- between story: sectoral differences in density by deprivation for each census year [DONE]
			- within story: geographic differences in density by deprivation for each census year (change score model) [DONE]
		
		Idea: create separate regressions by census year. [DONE]
				
		Sample reduction: drop 1st and 99th percentiles of charpop in each year (include in loop). [DONE]		
				
		There is dependence in the data that needs to be accounted for, either through robust standard errors or longitudinal models (re, fe).
		
		Try a range of estimators, including regress (and longitudinal extensions of it).
		
		Graph predicted counts in the same manner as Wo (2018) and Clifford (2012). Comment on the symmetry (or otherwise) of the predicted
		counts i.e. is it better to be one standard deviation above or below in terms of nonprofit density? The raw data suggests that the relationship
		is symmetrical, at least for recent years.
		
		KEY ISSUE: DECIDE ON FUNCTIONAL FORM OF DEPRIVATION. [DONE]
	*/
			
	sum charpop
	hist charpop, norm freq
	tabstat charpop, s(mean sd) by(year) // Looks to be over-dispersion and no zeroes; use tnbreg
	
	
	// Check shape of relationship between town and town2 and outcome
	
	scatter town2 town
	scatter charpop town2, by(period)
	/*
		Not sure what to do here... ask the Socstats group.
	*/
	
	// Estimate multivariate Zero-Truncated Negative Binomial Regression models
	/*
		A single model with interactions between deprivation and census year, 
		multiple models, one for each census year.
	*/
	
	gen charpop_discrete = round(charpop)
	l charpop_discrete in 1/10
	
	
	// Pooled cross section model - check for effect of year and interaction with deprivation
	
	tnbreg charpop town town2 ib1.period , nolog vce(robust)
		test 2.period 3.period 4.period 5.period // test if period variable is significant overall - yes it is
	
	tnbreg charpop town town2 ib1.period c.town#i.period, nolog vce(robust)
		test 2.period 3.period 4.period 5.period // test if period variable is significant overall - yes it is
		test 2.period#c.town 3.period#c.town 4.period#c.town 5.period#c.town // interactions overall are significant
		/*
			This provides justification for separate regressions with a lagged variable a la Clifford (2012).
		*/

	// 5.2.1 Overall charity density
	
	// 1971
	
	tab pph_cat if period==1
	quietly sum town if period==1, detail 
	local min = `r(min)'
	local max = `r(max)'
	
		// try charpop_discrete
		
		tnbreg charpop_discrete town town2 ib4.pph_cat if period==1, nolog vce(robust)
		tnbreg charpop town town2 ib4.pph_cat if period==1, nolog vce(robust)
		
		
	
	tnbreg charpop town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
		fitstat
		test town town2 // test if deprivation variables are significant overall
		test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall
		capture vif
		collin town town2 pph_cat if period==1
		
		**margins , at(town = (`min'(.75)`max')) grand // view predicted counts
		**marginsplot, noci allsim nolabels name(m1)

		predict pr_charpop_1 // generate predicted counts for each observation
		listcoef , percent help // view predicted change in outcome by independent variables
		tabstat charpop if period==1, s(mean p50) by(pph_cat) // sense-check the results of the margins command
		
			// Remove town2 and pph_cat to see the effects of multicollinearity
			
			tnbreg charpop town ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
				capture vif
				collin town pph_cat if period==1
				listcoef , percent help // view predicted change in outcome by independent variables

				
			tnbreg charpop town town2 if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
				collin town town2 if period==1
				listcoef , percent help // view predicted change in outcome by independent variables

	/*
		Why doesn't tnbreg report the results of LR test for alpha = 0? It's because of 'vce(robust)';
		removing this option reveals a significant LR test, meaning a negative binomial model is more appropriate than Poisson.
		
		Would qvgraph work with results of count model? [YES]
	*/
	
		// Quasi-variance comparison intervals
		/*
			The results of this command tell us whether there are statistically significant differences between quintiles,
			not just in reference to the base category.
		*/
		/*
		qv ib4.pph_cat
		qvgraph ib4.pph_cat, ytitle ("Expected change in number of charities (log)", size(medsmall)) xtitle("Urban/Rural Classification", size(medsmall) height(5)) ///
			title("Expected Change in Number of Charities") subtitle("Coefficient and Quasi-Variance Comparison Intervals (95%)") ///
			bgcolor(white) ylab(, nogrid) xlab(,val labs(vsmall)) name(qvg1)
			**graph save $path4\qvgraph_journal_20160726.gph, replace
		*/
		
	// 1981-2011
	
	forvalues i = 2/5 {
		tab pph_cat if period==`i'
		
		quietly sum town if period==`i', detail 
		local min = `r(min)'
		local max = `r(max)'

		tpoisson charpop town town2 ib4.pph_cat lag_charpop if period==`i', nolog vce(robust)
			fitstat
			test town town2 // test if deprivation variables are significant overall
			test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall
			capture vif
			collin town town2 pph_cat lag_charpop if period==`i'

			*margins , at(town = (`min'(.75)`max')) grand // view predicted counts
			*marginsplot, noci allsim nolabels name(m`i')
			
			predict pr_charpop_`i' // generate predicted counts for each observation
			listcoef , percent help // view predicted change in outcome by independent variables
			
		**qv ib4.pph_cat
		**qvgraph ib4.pph_cat, ytitle ("Expected change in number of charities (log)", size(medsmall)) xtitle("Urban/Rural Classification", size(medsmall) height(5)) ///
			**title("Expected Change in Number of Charities") subtitle("Coefficient and Quasi-Variance Comparison Intervals (95%)") ///
			**bgcolor(white) ylab(, nogrid) xlab(,val labs(vsmall)) name(qvg`i')
	}
	/*
		Check if LR test for alpha is significant for these years - the test says no overdispersion but the variance is clearly greater than
		the mean.		
	*/
		
		// Graph the predicted and observed number of charities by year
		
		gen pr_charpop = .
		forvalues i = 1/5 {
			replace pr_charpop = pr_charpop_`i' if period==`i'
		}
		list la_name year charpop pr_charpop
		
		/* Figure 3. Distribution of predicted number of charities per 5000 residents, by Townsend Score and census year */
		
		// Use marginsplot results
		/*
		graph combine m1 m2 m3 m4 m5 ///
			, title("Distribution of Predicted Charity Density") subtitle("By Level of Deprivation") ///
			l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
			note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,635." ///
			"Local authorities with a level of charity density in the 99th percentile are excluded." ///
			"Local authorities based in London are marked with an X.", span size(small))
		graph export $path6\la_stats_scatter_pr-overall_`fdate'.png, replace width(4096)	
		*/
		
		// Use predicted counts from model postestimation 'predict' command
		
		local fdate "20190129"
		capture graph drop *
		quietly count if pr_charpop!=.
		local numobs = `r(N)'
		preserve
					
			levelsof year, local(levels)
			foreach l of local levels {
				quietly sum charpop if year==`l', detail
				*return list
				
				label variable town " " // Remove variable label so it does not display on the graphs
				twoway (fpfit pr_charpop town if year==`l', lwidth(thick)) ///
					, title("`l'") ytitle(" ") ///
					ylabel(0(6)30, labsize(small)) xlabel(, labsize(small)) ///
					legend(off) ///
					scheme(s1mono) ///
					name(g`l')
				*graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096)
			}
			
			graph combine g1971 g1981 g1991 g2001 g2011 ///
				, title("Distribution of Predicted Charity Density") subtitle("By Level of Deprivation") ///
				l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
				"Local authorities with a level of charity density in the 99th percentile are excluded." ///
				"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
			graph export $path6\la_stats_scatter_pr-overall_`fdate'.png, replace width(4096)	
			
		restore
		
		
		// Sensitivity analysis - London effect
		/*
			Two analyses:
				- Add a London binary predictor to the existing models
				- Estimate the models for local authorities, excluding London-based
		*/
			
		recode london 1=1 *=0
		tab london
		
		levelsof year, local(levels)
		foreach l of local levels {
			quietly sum charpop if year==`l', detail
			*return list
			
			label variable town " " // Remove variable label so it does not display on the graphs
			twoway (scatter charpop town if year==`l' & london==0, msymbol(Oh) mcolor(%100)) ///
				(qfit charpop town if year==`l', lwidth(medium) lpatt(shortdash)) ///
				, title("`l'") ///
				ylabel(, labsize(small)) xlabel(, labsize(small)) ///
				legend(off) ///
				scheme(s1mono) ///
				name(lo`l')
			*graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096)
		}
		
		graph combine lo1971 lo1981 lo1991 lo2001 lo2011 ///
			, title("Distribution of Charity Density") subtitle("By Level of Deprivation") ///
			l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
			note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,635." ///
			"Local authorities with a level of charity density in the 99th percentile are excluded." ///
			"Local authorities based in London are excluded.", span size(small))
		graph export $path6\la_stats_scatter_overall_nolondon_`fdate'.png, replace width(4096)	

		
		// 1971
			
		tnbreg charpop town town2 ib4.pph_cat london if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
			fitstat
			test town town2 // test if deprivation variables are significant overall
			test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall
			
			predict s_pr_charpop_1 // generate predicted counts for each observation
			listcoef , percent help // view predicted change in outcome by independent variables
		
		// 1981-2011
		
		forvalues i = 2/5 {

			tpoisson charpop town town2 ib4.pph_cat lag_charpop london if period==`i', nolog vce(robust)
				fitstat
				test town town2 // test if deprivation variables are significant overall
				test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall
							
				predict s_pr_charpop_`i' // generate predicted counts for each observation
				listcoef , percent help // view predicted change in outcome by independent variables
		}
		
			list charpop london year if period==3
		
		// Graph the predicted and observed number of charities by year
		
		gen s_pr_charpop = .
		forvalues i = 1/5 {
			replace s_pr_charpop = s_pr_charpop_`i' if period==`i'
		}
		list la_name year charpop s_pr_charpop
		
		/* Figure 4. Sensitivity analysis: Distribution of predicted number of charities per 5000 residents, by Townsend Score and census year */
		
		// Use predicted counts from model postestimation 'predict' command
		
		local fdate "20190129"
		capture graph drop *
		preserve
					
			levelsof year, local(levels)
			foreach l of local levels {
				quietly sum charpop if year==`l', detail
				*return list
				
				label variable town " " // Remove variable label so it does not display on the graphs
				twoway (fpfit s_pr_charpop town if year==`l', lwidth(thick)) ///
					, title("`l'") ///
					ylabel(, labsize(small)) xlabel(, labsize(small)) ///
					legend(off) ///
					scheme(s1mono) ///
					name(s`l')
				*graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096)
			}
			
			graph combine s1971 s1981 s1991 s2001 s2011 ///
				, title("Distribution of Predicted Charity Density") subtitle("By Level of Deprivation") ///
				l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=1,635." ///
				"Local authorities with a level of charity density in the 99th percentile are excluded." ///
				"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
			graph export $path6\la_stats_scatter_s_pr-overall_`fdate'.png, replace width(4096)	
			
		restore
		/*
			The results are robust to the inclusion of a London explanatory factor, though the variable itself is significant for most years.
		*/
		
		
		// 5.2.2 Geographic scale of operation [SM002]
		/*
			Selected scales:
				- Local
				- National
				- Overseas

			TASK: consider the correct model specification i.e. zero-inflated, zero-truncated, poisson etc
		*/
		
		sum lcharpop ncharpop ocharpop, detail
		tab1 lcharpop ncharpop ocharpop, sort
		bys year: tabstat lcharpop ncharpop ocharpop, s(mean v) by(pph_cat)
		/*
			lcharpop can be modelled using zero-truncated negative binomial approach; ocharpop using zero-inflated poisson;
			ncharpop using poisson or negative binomial.
		*/
		
		
		// National charities
		
		nbreg ncharpop town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
			fitstat
			test town town2 // test if deprivation variables are significant overall
			test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall

			predict pr_nat_1 // generate predicted counts for each observation
			listcoef , percent help // view predicted change in outcome by independent variables
		/*
			Statistically significant result for alpha LR test - go with nbreg.
		*/
	
		// 1981-2011
				
		forvalues i = 2/5 {

			poisson ncharpop town town2 ib4.pph_cat lag_ncharpop if period==`i', nolog vce(robust) 
				fitstat
				test town town2 // test if deprivation variables are significant overall
				test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall
				
				predict pr_nat_`i' // generate predicted counts for each observation
				listcoef , percent help // view predicted change in outcome by independent variables
		}
		/*
			LR test for alpha is significant for every year except 2011 - switch to standard poisson for 2011.
		*/
			

		// Graph the predicted and observed number of charities by year
				
		gen pr_nat = .
		forvalues i = 1/5 {
			replace pr_nat = pr_nat_`i' if period==`i'
		}
		list la_name year ncharpop pr_nat
				
		/* Figure 4. Distribution of predicted number of National charities per 5000 residents, by Townsend Score and census year */

		// Use predicted counts from model postestimation 'predict' command
				
		local fdate "20190129"
		capture graph drop *
		quietly count if pr_nat!=.
		local numobs = `r(N)'
		preserve
						
			levelsof year, local(levels)
			foreach l of local levels {
				*return list
				
				label variable pr_nat " "
				label variable town " " // Remove variable label so it does not display on the graphs
				twoway (fpfit pr_nat town if year==`l', lwidth(thick)) ///
					, title("`l'") ytitle(" ") ///
					ylabel(0(2)10, labsize(small)) xlabel(, labsize(small)) ///
					legend(off) ///
					scheme(s1mono) ///
					name(i`l')
				/*
					Could potentially add a scatter of London or urban/rural to see where these points lie along the line.
				*/
			}
					
			graph combine i1971 i1981 i1991 i2001 i2011 ///
				, title("Distribution of Predicted Density (National Charities)") subtitle("By Level of Deprivation") ///
				l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
				"Local authorities with a level of charity density in the 99th percentile are excluded." ///
				"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
			graph export $path6\la_stats_pr_nat_`fdate'.png, replace width(4096)	
					
		restore
	
		
		// Overseas charities
		
		tab year ocharpop if ocharpop==0 
		/*
			Lots of zeroes for 1971 and 1981, very few or none for 1991, 2001 and 2011 - zip for early years, poisson for later years.
		*/
		
		// 1971
		
		zip ocharpop town town2 ib4.pph_cat if period==1, nolog inflate(town town2 ib4.pph_cat) vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
			fitstat
			test [ocharpop]town [ocharpop]town2 // test if deprivation variables are significant overall
			test [ocharpop]1.pph_cat [ocharpop]2.pph_cat [ocharpop]3.pph_cat [ocharpop]5.pph_cat // test if urban/rural is significant overall

			predict pr_ove_1 // generate predicted counts for each observation
			listcoef , percent help // view predicted change in outcome by independent variables
	
		// 1981
		
		zip ocharpop town town2 ib4.pph_cat lag_ocharpop if period==2, nolog inflate(town town2 ib4.pph_cat lag_ocharpop) vce(robust) 
			fitstat
			test [ocharpop]town [ocharpop]town2 // test if deprivation variables are significant overall
			test [ocharpop]1.pph_cat [ocharpop]2.pph_cat [ocharpop]3.pph_cat [ocharpop]5.pph_cat // test if urban/rural is significant overall

			predict pr_ove_2 // generate predicted counts for each observation
			listcoef , percent help // view predicted change in outcome by independent variables

		
		// 1991 - 2011
				
		forvalues i = 3/5 {

			poisson ocharpop town town2 ib4.pph_cat lag_ocharpop if period==`i', nolog vce(robust) 
				fitstat
				test town town2 // test if deprivation variables are significant overall
				test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall
				
				predict pr_ove_`i' // generate predicted counts for each observation
				listcoef , percent help // view predicted change in outcome by independent variables

		}		

		// Graph the predicted and observed number of charities by year
				
		gen pr_ove = .
		forvalues i = 1/5 {
			replace pr_ove = pr_ove_`i' if period==`i'
		}
		list la_name year ocharpop pr_ove
				
		/* Figure 5. Distribution of predicted number of National charities per 5000 residents, by Townsend Score and census year */

		// Use predicted counts from model postestimation 'predict' command
				
		local fdate "20190129"
		capture graph drop *
		quietly count if pr_ove!=.
		local numobs = `r(N)'
		preserve
						
			levelsof year, local(levels)
			foreach l of local levels {
				*return list
				
				label variable pr_ove " "
				label variable town " " // Remove variable label so it does not display on the graphs
				twoway (fpfit pr_ove town if year==`l', lwidth(thick)) ///
					, title("`l'") ytitle(" ") ///
					ylabel(0(1)6, labsize(small)) xlabel(, labsize(small)) ///
					legend(off) ///
					scheme(s1mono) ///
					name(i`l')
			}
					
			graph combine i1971 i1981 i1991 i2001 i2011 ///
				, title("Distribution of Predicted Density (Overseas Charities)") subtitle("By Level of Deprivation") ///
				l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
				"Local authorities with a level of charity density in the 99th percentile are excluded." ///
				"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
			graph export $path6\la_stats_pr_ove_`fdate'.png, replace width(4096)	
					
		restore
		
		
		// Local charities
		
		// 1971
		
		tnbreg lcharpop town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
			fitstat
			test town town2 // test if deprivation variables are significant overall
			test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall

			predict pr_loc_1 // generate predicted counts for each observation
			listcoef , percent help // view predicted change in outcome by independent variables
			
		// 1981-2011
		
		forvalues i = 2/5 {

			tpoisson lcharpop town town2 ib4.pph_cat lag_lcharpop if period==`i', nolog vce(robust)
				fitstat
				test town town2 // test if deprivation variables are significant overall
				test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall

				predict pr_loc_`i' // generate predicted counts for each observation
				listcoef , percent help // view predicted change in outcome by independent variables
		}
		/*
			Check if LR test for alpha is significant for these years - the test says no overdispersion but the variance is clearly greater than
			the mean.
		*/
		
			// Graph the predicted and observed number of charities by year
			
			gen pr_loc = .
			forvalues i = 1/5 {
				replace pr_loc = pr_loc_`i' if period==`i'
			}
			list la_name year lcharpop pr_loc
			
			/* Figure 6. Distribution of predicted number of Local charities per 5000 residents, by Townsend Score and census year */

			// Use predicted counts from model postestimation 'predict' command
			
			local fdate "20190129"
			capture graph drop *
			quietly count if pr_loc!=.
			local numobs = `r(N)'
			preserve
						
			levelsof year, local(levels)
			foreach l of local levels {
				*return list
				
				label variable pr_loc " "
				label variable town " " // Remove variable label so it does not display on the graphs
				twoway (fpfit pr_loc town if year==`l', lwidth(thick)) ///
					, title("`l'") ytitle(" ") ///
					ylabel(0(8)24, labsize(small)) xlabel(, labsize(small)) ///
					legend(off) ///
					scheme(s1mono) ///
					name(i`l')
			}
					
			graph combine i1971 i1981 i1991 i2001 i2011 ///
				, title("Distribution of Predicted Density (Local Charities)") subtitle("By Level of Deprivation") ///
				l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
				"Local authorities with a level of charity density in the 99th percentile are excluded." ///
				"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
			graph export $path6\la_stats_pr_loc_`fdate'.png, replace width(4096)	
				
			restore

		
		
		// 5.2.3 Field of activity [SM003]
		/*
			Selected areas, based on most common categories of ICNPO:
				- Social services (17)
				- Religion (14)
				- Culture and recreation (1)
				- Development (2)
				- Education (3)
				
			TASK: ask JM about these choices. [DONE]
			TASK: consider the correct model specification i.e. zero-inflated, zero-truncated, poisson etc
		*/
		
		sum charpop_icnpo*
		tabstat charpop_icnpo*, s(mean v) by(year) // Looks to be no need for nbreg
		
		// Social services
		
		hist charpop_icnpo17, freq norm scheme(s1mono) // tpoisson is the appropriate model (no overdispersion and zero-truncated).
		
		tpoisson charpop_icnpo17 town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
			fitstat
			test town town2 // test if deprivation variables are significant overall
			test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall

			predict pr_icnpo_17_1 // generate predicted counts for each observation
			listcoef , percent help // view predicted change in outcome by independent variables
	
			// 1981-2011
				
			forvalues i = 2/5 {

				tpoisson charpop_icnpo17 town town2 ib4.pph_cat lag_charpop_icnpo17 if period==`i', nolog vce(robust)
					fitstat
					test town town2 // test if deprivation variables are significant overall
					test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall
					
					predict pr_icnpo_17_`i' // generate predicted counts for each observation
					listcoef , percent help // view predicted change in outcome by independent variables

			}
			/*
				Check if LR test for alpha is significant for these years - the test says no overdispersion but the variance is clearly greater than
				the mean.
			*/
		
			// Graph the predicted and observed number of charities by year
				
			gen pr_icnpo_17 = .
			forvalues i = 1/5 {
				replace pr_icnpo_17 = pr_icnpo_17_`i' if period==`i'
			}
			list la_name year charpop_icnpo17 pr_icnpo_17
				
			/* Figure 7. Distribution of predicted number of Social Services charities per 5000 residents, by Townsend Score and census year */

			// Use predicted counts from model postestimation 'predict' command
				
			local fdate "20190129"
			capture graph drop *
			quietly count if pr_icnpo_17!=.
			local numobs = `r(N)'
			preserve
							
				levelsof year, local(levels)
				foreach l of local levels {
					*return list
					
					label variable town " " // Remove variable label so it does not display on the graphs
					twoway (fpfit pr_icnpo_17 town if year==`l', lwidth(thick)) ///
						, title("`l'") ytitle(" ") ///
						ylabel(0(1)7, labsize(small)) xlabel(, labsize(small)) ///
						legend(off) ///
						scheme(s1mono) ///
						name(i`l')
				}
					
				graph combine i1971 i1981 i1991 i2001 i2011 ///
					, title("Distribution of Predicted Density (Social Services)") subtitle("By Level of Deprivation") ///
					l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
					note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
					"Local authorities with a level of charity density in the 99th percentile are excluded." ///
					"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
				graph export $path6\la_stats_pr_icnpo17_`fdate'.png, replace width(4096)	
					
			restore


			// Religion
			
			hist charpop_icnpo14, freq norm scheme(s1mono) // tpoisson is the appropriate model (no overdispersion and zero-truncated).
		
			poisson charpop_icnpo14 town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
				fitstat
				test town town2 // test if deprivation variables are significant overall
				test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall

				predict pr_icnpo_14_1 // generate predicted counts for each observation
				listcoef , percent help // view predicted change in outcome by independent variables
			/*
				tpoisson won't converge; need to go with poisson.
			*/
				
		
				// 1981-2011
					
				forvalues i = 2/5 {

					poisson charpop_icnpo14 town town2 ib4.pph_cat lag_charpop_icnpo14 if period==`i', nolog vce(robust)
						fitstat
						test town town2 // test if deprivation variables are significant overall
						test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall
						
						predict pr_icnpo_14_`i' // generate predicted counts for each observation
						listcoef , percent help // view predicted change in outcome by independent variables

				}
				/*
					tpoisson won't converge; need to go with poisson.
				*/
			
				// Graph the predicted and observed number of charities by year
					
				gen pr_icnpo_14 = .
				forvalues i = 1/5 {
					replace pr_icnpo_14 = pr_icnpo_14_`i' if period==`i'
				}
				list la_name year charpop_icnpo14 pr_icnpo_14
					
				/* Figure 4. Distribution of predicted number of Social services charities per 5000 residents, by Townsend Score and census year */

				// Use predicted counts from model postestimation 'predict' command
					
				local fdate "20190129"
				capture graph drop *
				quietly count if pr_icnpo_14!=.
				local numobs = `r(N)'
				preserve
								
					levelsof year, local(levels)
					foreach l of local levels {
						*return list
						
						label variable town " " // Remove variable label so it does not display on the graphs
						twoway (fpfit pr_icnpo_14 town if year==`l', lwidth(thick)) ///
							, title("`l'") ytitle(" ") ///
							ylabel(0(1)6, labsize(small)) xlabel(, labsize(small)) ///
							legend(off) ///
							scheme(s1mono) ///
							name(i`l')
					}
						
					graph combine i1971 i1981 i1991 i2001 i2011 ///
						, title("Distribution of Predicted Density (Religion)") subtitle("By Level of Deprivation") ///
						l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
						note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
						"Local authorities with a level of charity density in the 99th percentile are excluded." ///
						"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
					graph export $path6\la_stats_pr_icnpo14_`fdate'.png, replace width(4096)	
						
				restore
				
				
			// Culture and recreation
		
			// 1971 
			
			poisson charpop_icnpo1 town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
				fitstat
				test town town2 // test if deprivation variables are significant overall
				test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall

				predict pr_icnpo_1_1 // generate predicted counts for each observation
				listcoef , percent help // view predicted change in outcome by independent variables
		
				// 1981-2011
					
				forvalues i = 2/5 {

					poisson charpop_icnpo1 town town2 ib4.pph_cat lag_charpop_icnpo1 if period==`i', nolog vce(robust)
						fitstat
						test town town2 // test if deprivation variables are significant overall
						test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall
						
						predict pr_icnpo_1_`i' // generate predicted counts for each observation
						listcoef , percent help // view predicted change in outcome by independent variables

				}
			
				// Graph the predicted and observed number of charities by year
					
				gen pr_icnpo_1 = .
				forvalues i = 1/5 {
					replace pr_icnpo_1 = pr_icnpo_1_`i' if period==`i'
				}
				list la_name year charpop_icnpo1 pr_icnpo_1
					
				/* Figure 9. Distribution of predicted number of Social services charities per 5000 residents, by Townsend Score and census year */

				// Use predicted counts from model postestimation 'predict' command
					
				local fdate "20190129"
				capture graph drop *
				quietly count if pr_icnpo_1!=.
				local numobs = `r(N)'
				preserve
								
					levelsof year, local(levels)
					foreach l of local levels {
						*return list
						
						label variable town " " // Remove variable label so it does not display on the graphs
						twoway (fpfit pr_icnpo_1 town if year==`l', lwidth(thick)) ///
							, title("`l'") ytitle(" ") ///
							ylabel(0(1)4, labsize(small)) xlabel(, labsize(small)) ///
							legend(off) ///
							scheme(s1mono) ///
							name(i`l')
					}
						
					graph combine i1971 i1981 i1991 i2001 i2011 ///
						, title("Distribution of Predicted Density (Culture and Recreation)") subtitle("By Level of Deprivation") ///
						l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
						note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
						"Local authorities with a level of charity density in the 99th percentile are excluded." ///
						"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
					graph export $path6\la_stats_pr_icnpo1_`fdate'.png, replace width(4096)	
						
				restore

				
			// Development
			
			tab year charpop_icnpo2 if charpop_icnpo2 < .1 // Not zero-inflated
		
			// 1971 
			
			poisson charpop_icnpo2 town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
				fitstat
				test town town2 // test if deprivation variables are significant overall
				test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall

				predict pr_icnpo_2_1 // generate predicted counts for each observation
				listcoef , percent help // view predicted change in outcome by independent variables
		
				// 1981-2011
					
				forvalues i = 2/5 {

					poisson charpop_icnpo2 town town2 ib4.pph_cat lag_charpop_icnpo2 if period==`i', nolog vce(robust)
						fitstat
						test town town2 // test if deprivation variables are significant overall
						test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall
						
						predict pr_icnpo_2_`i' // generate predicted counts for each observation
						listcoef , percent help // view predicted change in outcome by independent variables

				}
			
				// Graph the predicted and observed number of charities by year
					
				gen pr_icnpo_2 = .
				forvalues i = 1/5 {
					replace pr_icnpo_2 = pr_icnpo_2_`i' if period==`i'
				}
				list la_name year charpop_icnpo2 pr_icnpo_2
					
				/* Figure 10. Distribution of predicted number of Development charities per 5000 residents, by Townsend Score and census year */

				// Use predicted counts from model postestimation 'predict' command
					
				local fdate "20190129"
				capture graph drop *
				quietly count if pr_icnpo_2!=.
				local numobs = `r(N)'
				preserve
								
					levelsof year, local(levels)
					foreach l of local levels {
						*return list
						
						label variable town " " // Remove variable label so it does not display on the graphs
						twoway (fpfit pr_icnpo_2 town if year==`l', lwidth(thick)) ///
							, title("`l'") ytitle(" ") ///
							ylabel(0(.5)2, labsize(small)) xlabel(, labsize(small)) ///
							legend(off) ///
							scheme(s1mono) ///
							name(i`l')
					}
						
					graph combine i1971 i1981 i1991 i2001 i2011 ///
						, title("Distribution of Predicted Density (Development)") subtitle("By Level of Deprivation") ///
						l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
						note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
						"Local authorities with a level of charity density in the 99th percentile are excluded." ///
						"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
					graph export $path6\la_stats_pr_icnpo2_`fdate'.png, replace width(4096)	
						
				restore

		
			// Education
			
			tab year charpop_icnpo3 if charpop_icnpo3 < .1 // Not zero-inflated
		
			// 1971 
			
			poisson charpop_icnpo3 town town2 ib4.pph_cat if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
				fitstat
				test town town2 // test if deprivation variables are significant overall
				test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural is significant overall

				predict pr_icnpo_3_1 // generate predicted counts for each observation
				listcoef , percent help // view predicted change in outcome by independent variables
		
				// 1981-2011
					
				forvalues i = 2/5 {

					poisson charpop_icnpo3 town town2 ib4.pph_cat lag_charpop_icnpo3 if period==`i', nolog vce(robust)
						fitstat
						test town town2 // test if deprivation variables are significant overall
						test 1.pph_cat 2.pph_cat 3.pph_cat 5.pph_cat // test if urban/rural significant overall
						
						predict pr_icnpo_3_`i' // generate predicted counts for each observation
						listcoef , percent help // view predicted change in outcome by independent variables

				}
			
				// Graph the predicted and observed number of charities by year
					
				gen pr_icnpo_3 = .
				forvalues i = 1/5 {
					replace pr_icnpo_3 = pr_icnpo_3_`i' if period==`i'
				}
				list la_name year charpop_icnpo3 pr_icnpo_3
					
				/* Figure 11. Distribution of predicted number of Education charities per 5000 residents, by Townsend Score and census year */

				// Use predicted counts from model postestimation 'predict' command
					
				local fdate "20190129"
				capture graph drop *
				quietly count if pr_icnpo_3!=.
				local numobs = `r(N)'
				preserve
								
					levelsof year, local(levels)
					foreach l of local levels {
						*return list
						
						label variable town " " // Remove variable label so it does not display on the graphs
						twoway (fpfit pr_icnpo_3 town if year==`l', lwidth(thick)) ///
							, title("`l'") ytitle(" ") ///
							ylabel(0(.5)2, labsize(small)) xlabel(, labsize(small)) ///
							legend(off) ///
							scheme(s1mono) ///
							name(i`l')
					}
						
					graph combine i1971 i1981 i1991 i2001 i2011 ///
						, title("Distribution of Predicted Density (Education)") subtitle("By Level of Deprivation") ///
						l1title("No. of charities per 5,000 residents") b1title("Townsend Score") ///
						note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
						"Local authorities with a level of charity density in the 99th percentile are excluded." ///
						"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
					graph export $path6\la_stats_pr_icnpo3_`fdate'.png, replace width(4096)	
						
				restore			
		
		
	/* Spatial models */
	/*
		The strategy is to merge observations for each census year with the spatial data set and estimate the same models
		as above, only this time with spatially autocorrelated residuals.
	*/
	
	use $path3\la_stats_1971-2011_maps.dta, clear
	sort la_code_str
	desc, f
	
	drop charpop* numobs // drop density and other variables
	
	// Create spatial matrices
	
	spmatrix clear 
	spmatrix create contiguity m1, rook
	spmatrix create idistance m2

	
	forvalues i = 1/5 {
		preserve
			use $path3\la_stats_1971-2011_analysis.dta, clear // load in density data set
			keep if period==`i'
			keep la_id la_code charpop lag_charpop town town2 pph_cat year period
			
			rename la_code la_code_str
			foreach var in charpop lag_charpop town town2 pph_cat {
				rename `var' `var'_`i'
			}
			sort la_code_str
			sav $path1\la_stats_period-`i'.dta, replace
		restore
		
		*merge 1:1 la_code_str using $path1\la_stats_period-`i'.dta, keep(match master)
	}
	
	
	forvalues i = 1/5 {
		merge 1:1 la_code_str using $path1\la_stats_period-`i'.dta, keep(match master)
		drop _merge
	}
	
	desc, f
	
		
	// Estimate regressions

		// 1971
		
		regress charpop_1 town_1 town2_1 ib4.pph_cat_1 , vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
			estat moran, errorlag(m1)  errorlag(m2) // evidence of spatial dependence
			/*
			fitstat
			test town_1 town2_1 // test if deprivation variables are significant overall
			test 1.pph_cat_1 2.pph_cat_1 3.pph_cat_1 5.pph_cat_1 // test if urban/rural is significant overall
			capture vif
			collin town_1 town2_1 pph_cat_1
			listcoef, help
			*/
						
			** Spatial regression **
			
			spregress charpop_1 town_1 town2_1 ib4.pph_cat_1 , gs2sls err(m1) dvarlag(m1) ivarl(m1: town_1) // contiguity matrix
				test town_1 town2_1 // test if deprivation variables are significant overall
				test 1.pph_cat_1 2.pph_cat_1 3.pph_cat_1 5.pph_cat_1 // test if urban/rural is significant overall
				estat impact
			/*
				There is a spillover effect for 1971, as well as a statistically significant Y lag effect.
			*/
				
			*spregress charpop_1 town_1 town2_1 ib4.pph_cat_1 , gs2sls err(m1) dvarlag(m1) // inverse distance matrix
			*	estat impact
			/*
				Regression coefficients are stable across weighting matrices.
			*/
			
			
		// 1981-2011
		
		forvalues i = 2/5 {
			*regress charpop_`i' town_`i' town2_`i' ib4.pph_cat_`i' lag_charpop_`i', vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
			*estat moran, errorlag(m1)  errorlag(m2) // evidence of spatial dependence
			*fitstat
			*test town_`i' town2_`i' // test if deprivation variables are significant overall
			*test 1.pph_cat_`i' 2.pph_cat_`i' 3.pph_cat_`i' 5.pph_cat_`i' // test if urban/rural is significant overall
			*capture vif
			**collin town_`i' town2_`i' pph_cat_`i'
			*listcoef, help
						
			** Spatial regression **
			
			spregress charpop_`i' town_`i' town2_`i' ib4.pph_cat_`i' lag_charpop_`i' , gs2sls err(m1) dvarlag(m1) ivarl(m1: town_1) // contiguity matrix
				test town_`i' town2_`i'
				test 1.pph_cat_`i' 2.pph_cat_`i' 3.pph_cat_`i' 5.pph_cat_`i' // test if urban/rural is significant overall
				estat impact
			/*
				No spillover effects for these years and no statistically significant Y lag effect.
			*/
				
			/*
			spregress charpop_`i' town_`i' town2_`i' ib4.pph_cat_`i' lag_charpop_`i', gs2sls err(m1) dvarlag(m1) // inverse distance matrix
				test town_`i' town2_`i'
				estat impact
			*/	
			/*
				Regression coefficients are stable across weighting matrices.
			*/

		}
		
		
	
			
			predict dres_1, residuals
			
			capture grmap, activate
			grmap dres_1 , t(1) clm(q) cln(9) fcolor(BuYlRd) legt("Residual Cuts") legs(2) legend(pos(10) size(vsmall)) ///
				title("Model Residuals (1971)") subtitle("By local authority") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) ///
				caption("Residuals derived from OLS regression.", size(vsmall))
			graph export $path6\la_stats_charpop_dres1.png, replace width(4096)

			
			
			capture grmap, activate
			grmap dres_m1 , t(1) clm(q) cln(9) fcolor(BuYlRd) legt("Residual Cuts") legs(2) legend(pos(10) size(vsmall)) ///
				title("Model Residuals (1971)") subtitle("By local authority") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) ///
				caption("Residuals derived from spatial regression with contiguity weighting matrix.", size(vsmall))
			graph export $path6\la_stats_charpop_dres_m1.png, replace width(4096)
			
		spregress charpop town town2 ib4.pph_cat if period==1, gs2sls err(m2) // lagged residual, inverse distance matrix
			predict dres_m2, residuals
			
			capture grmap, activate
			grmap dres_m2 , t(1) clm(q) cln(9) fcolor(BuYlRd) legt("Residual Cuts") legs(2) legend(pos(10) size(vsmall)) ///
				title("Model Residuals (1971)") subtitle("By local authority") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=326.", size(vsmall)) ///
				caption("Residuals derived from spatial regression with inverse distance weighting matrix.", size(vsmall))
			graph export $path6\la_stats_charpop_dres_m2.png, replace width(4096)



		*spregress charpop town town2 ib4.pph_cat , gs2sls dvarl(m1) // lagged dependent variable
		*spregress charpop town town2 ib4.pph_cat , gs2sls ivarl(m1: town) // lagged independent variable

	
	
	/* Change Score models */
	/*
		These models capture changes in the independent variables for a local authority and their impact
		on the change in the dependent variable.
		
		As the change in density can be negative we estimate a series of OLS regressions with robust standard errors.
		
		Use binary alternatives to `town` and `town2` e.g. change in ranking up or down.
	*/
	
	l la_name year town town_rank town_rank_change
	gen trc_cat = .
	replace trc_cat = . if town_rank_change==0 // set no change to missing as there are two few observations to identify a stable effect
	replace trc_cat = 0 if town_rank_change<0
	replace trc_cat = 1 if town_rank_change>0 // 1 = becoming more deprived
	tab trc_cat
	l la_name year town town_rank town_rank_change trc_cat
	
	// Whole sample
	
	regress charpop_change trc_cat urban_change rural_change , vce(robust)
	/*
		Forget about this approach.
	*/

	
	// 1981
		
	regress charpop_change town_change town2_change urban_change rural_change if period==2, vce(robust)
		fitstat
		test town_change town2_change // test if deprivation variables are significant overall
		test urban_change rural_change // test if urban/rural dummy variables are significant overall
		capture vif
		collin town_change town2_change urban_change rural_change if period==2
		*margins , at(town = (`min'(.75)`max')) grand // view predicted counts
		*marginsplot, noci allsim nolabels name(m`i')
		
		predict pr_charpop_cs_2 // generate predicted counts for each observation
		listcoef , help // view predicted change in outcome by independent variables

		// Alternative measure of rank
		
		regress charpop_change trc_cat urban_change rural_change if period==2, vce(robust)
			fitstat
			test urban_change rural_change // test if urban/rural dummy variables are significant overall
			capture vif
			collin trc_cat urban_change rural_change if period==2
			*margins , at(town = (`min'(.75)`max')) grand // view predicted counts
			*marginsplot, noci allsim nolabels name(m`i')
			
			*predict pr_charpop_cs_2 // generate predicted counts for each observation
			listcoef , help // view predicted change in outcome by independent variables


	
	// 1991 - 2011
	
	forvalues i = 3/5 {
		
		quietly sum town if period==`i', detail 
		local min = `r(min)'
		local max = `r(max)'

		regress charpop_change town_change town2_change urban_change rural_change lag_charpop_change if period==`i', vce(robust) // should it be prior density or prior density change?
			fitstat
			test town_change town2_change
			test urban_change rural_change // test if urban/rural dummy variables are significant overall
			capture vif
			collin town_change town2_change urban_change rural_change lag_charpop_change if period==`i'
			*margins , at(town = (`min'(.75)`max')) grand // view predicted counts
			*marginsplot, noci allsim nolabels name(m`i')
			
			predict pr_charpop_cs_`i' // generate predicted counts for each observation
			listcoef , help // view predicted change in outcome by independent variables
			
		**qv ib4.pph_cat
		**qvgraph ib4.pph_cat, ytitle ("Expected change in number of charities (log)", size(medsmall)) xtitle("Urban/Rural Classification", size(medsmall) height(5)) ///
			**title("Expected Change in Number of Charities") subtitle("Coefficient and Quasi-Variance Comparison Intervals (95%)") ///
			**bgcolor(white) ylab(, nogrid) xlab(,val labs(vsmall)) name(qvg`i')
	}
	
		// Alternative measure of rank
		
		forvalues i = 3/5 {
		
			quietly sum town if period==`i', detail 
			local min = `r(min)'
			local max = `r(max)'

			regress charpop_change trc_cat urban_change rural_change lag_charpop_change if period==`i', vce(robust) // should it be prior density or prior density change?
				fitstat
				test urban_change rural_change // test if urban/rural dummy variables are significant overall
				capture vif
				collin trc_cat urban_change rural_change lag_charpop_change if period==`i'
				*margins , at(town = (`min'(.75)`max')) grand // view predicted counts
				*marginsplot, noci allsim nolabels name(m`i')
				
				*predict pr_charpop_cs_`i' // generate predicted counts for each observation
				listcoef , help // view predicted change in outcome by independent variables
				
			**qv ib4.pph_cat
			**qvgraph ib4.pph_cat, ytitle ("Expected change in number of charities (log)", size(medsmall)) xtitle("Urban/Rural Classification", size(medsmall) height(5)) ///
				**title("Expected Change in Number of Charities") subtitle("Coefficient and Quasi-Variance Comparison Intervals (95%)") ///
				**bgcolor(white) ylab(, nogrid) xlab(,val labs(vsmall)) name(qvg`i')
		}

		// 2011 - check F test
		
		regress charpop_change trc_cat urban_change rural_change lag_charpop_change if period==5

		
		// Graph the predicted and observed number of charities by year
		
		gen pr_charpop_cs = .
		forvalues i = 2/5 {
			replace pr_charpop_cs = pr_charpop_cs_`i' if period==`i'
		}
		list la_name year charpop pr_charpop_cs
		
		/* Figure #. Distribution of predicted change in number of charities per 5000 residents, by Townsend Score change and census year */
				
		// Use predicted counts from model postestimation 'predict' command
		
		local fdate "20190129"
		capture graph drop *
		quietly count if pr_charpop_cs!=.
		local numobs = `r(N)'
		preserve
					
			*levelsof year, local(levels)
			foreach l in 1981 1991 2001 2011 {
				quietly sum charpop if year==`l', detail
				*return list
				
				label variable town_change " " // Remove variable label so it does not display on the graphs
				twoway (fpfit pr_charpop_cs town_change if year==`l', lwidth(thick)) ///
					, title("`l'") ytitle(" ") ///
					ylabel(, labsize(small)) xlabel(, labsize(small)) ///
					legend(off) ///
					scheme(s1mono) ///
					name(g`l')
				*graph export $path6\la_stats_scatter_`l'_`fdate'.png, replace width(4096)
			}
			
			graph combine g1981 g1991 g2001 g2011 ///
				, title("Distribution of Predicted Change in Charity Density") subtitle("By Change In Level of Deprivation") ///
				l1title("Change in no. of charities per 5,000 residents") b1title("Townsend Score change") ///
				note("Source: Charity Commission Register of Charities (31/12/2016) and Popchange; n=`numobs'." ///
				"Local authorities with a level of charity density in the 99th percentile are excluded." ///
				"Graph displays polynomial line of best fit between predicted charity density and Townsend Score.", span size(small))
			graph export $path6\la_stats_scatter_pr-cs_`fdate'.png, replace width(4096)	
			
		restore

		
		/* Fixed Effects models */
		
		// Whole sample
		
		twoway (qfit charpop town) (scatter charpop town)
		xtpoisson charpop town town2 ib4.pph_cat, fe
		
		
		// 1981-2011
		
		xtpoisson charpop town town2 ib4.pph_cat lag_charpop, fe
		xtreg charpop town town2 ib4.pph_cat lag_charpop, re
		xtreg charpop town town2 ib4.pph_cat lag_charpop, be
		
		xtreg charpop town town2 ib4.pph_cat lag_charpop, fe
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		/*
		// Alternative functional forms of town and pph
		
		forvalues i = 1/5 {
			xtile town_cat_`i' = town if period==`i', n(5)
			tab town_cat_`i'
			tabstat town, s(mean p50) by(town_cat_`i')
		}	
	
		tab town_cat quin // Why are these different? Go back to documentation of original variable.
			list la_name year quin town
			tab quin year // Struggling to interpret this variable

		
		/*
		tnbreg charpop town_cat pph if period==1, nolog vce(robust) // Estimate 1971 separately as there is no lagged variable for this year.
			test town town2
			listcoef , percent help // view predicted change in outcome by independent variables

		
		forvalues i = 2/5 {
			di "Year: " year
			tnbreg charpop town_cat pph lag_charpop if period==`i', nolog vce(robust)
			test town town2
			listcoef , percent help // view predicted change in outcome by independent variables
		}
		*/
		
		// Alternative model specifications i.e. pooled cross-sectional, basic Poisson and OLS, random and fixed effects.
		/*
			I need to ensure these are consistent with the other models in terms of variables.
		*/
		
		// Pooled cross-sectional [TNBREG & OLS]
		
		tnbreg charpop town town2 ib4.pph_cat lag_charpop, nolog vce(robust)
		
		reg charpop town town2 ib4.pph_cat ib2.period lag_charpop, vce(robust)
		
		// Fixed effects [OLS]
		
		xtset la_id period

		xtreg charpop town town2 ib4.pph_cat ib2.period lag_charpop, fe
		
		// Random effects [OLS]
				
		xtreg charpop town town2 ib4.pph_cat ib2.period lag_charpop, re
		/*
			Test which model is appropriate using Hausman and other tests.
		*/
		*/