// NEW, changed I.C to PHI, computes IC as well

// compute index of coincidence to get cipher type
// PHI: 0.035 = random, 0.065 = english (i.e ssc)
//
//      f_i * (f_i-1)
// SUM	-------------
//	n * (n-1)

// where n is length, and f_i is frequency of letter number i

// so what we need to do is get frequency (i.e number of times it occurs)
// in the file for each letter, then run the test, and give the result

// do this tomorrow
// 	[done]

// does only calculate letters, not spaces or other chars
// i.e [A-Z][a-z]

// NEW: expanded to simple frequency analysis

#include <iostream>
#include <fstream>

int sortfrequency(long long * amount, int * order, int size) {
	// amount: what is to be sorted
	// order: where to put the order of the sorted items
	// size: size of array amount
	
	// sorts frequency from most to least
	// yes, it is a simple bubble sort workalike. however, we're not
	// exactly sorting terabytes, so bwah..
	
	int done[size];
	int count;
	int record = 0;
	long long recordamnt = 0;
	int sec_count = 0;

	for (count = 0; count < size; count++) done[count] = 0;

	// now search for the highest, and do that 26 times
	for (sec_count = 0; sec_count < size; sec_count++) {
		for (count = 0; count < size; count++) {
			if (done[count] != 1) {
				if (amount[count] > recordamnt) {
					record = count;
					recordamnt = amount[count];
				}
			}
		}

		// put record --> ordered...
		order[sec_count] = record;
		// ..and then reset the record
		done[record] = 1;
		record = 0;
		recordamnt = -1;	//i don't think any frequency encount-
					//ered is negative, so this should work
	}

	return(0);
}

int main() {
	// TODO: split up into n-sized chunks to prevent overflow of
	// variables. then perform an average.

	long long amount[27]; 	// for frequency analysis
	int ordered[26];	// to sort in order of frequency
	double upper;		// numerator
	double phi;
	double ic;		// index of coincidence
	double percentage;	// hmm???
	long long length;	// number of letters
	char * filename = new char[100];// yes, you can overflow it.
	char current;
	unsigned long counter;

	for (counter = 0; counter < 26; counter++) amount[counter] = 0;
	ic = 0;
	phi = 0;
	length = 0;

	cout << "Enter filename: ";
	cin >> filename;

	ifstream tosearch(filename);

	if (!tosearch) {
		cout << "ERROR: File not found. Exiting!\n";
		return(-1);
	}

	while (!tosearch.eof()) {
		tosearch >> current;

		if ( (current >= 'A') && (current <= 'z') ) {
			length++;
			// yes, correct char
			if (current >= 'a') {
				amount[current-'a']++;
			} else {
				amount[current-'A']++;
			}
		}
	}

	tosearch.close();
	
	// compute phi

	for (counter = 0; counter<26; counter++) {
		upper =  amount[counter] * (amount[counter]-1);
		phi = phi + ((upper/length)/(length-1));
	}

	// compute i_C
	ic = phi*26;

	// print some statistics

	cout << "Searched and recognized "<<length-1 << " letters.\n\n";

	cout << "PHI = "<< phi << "N(N-1).\n";
	cout << "English text is about 0.065, random text about 0.033\n\n";
	
	cout << "Index of coincidence: " << ic << "\n";
	cout << "English text is about 1.73. Random is 1.\n\n";

	cout << "Frequency analysis:\n";

	sortfrequency(amount, ordered, 26);

	for (counter = 0; counter < 26; counter++) {

		percentage = (amount[ordered[counter]]*100)/length;

		cout << (char)(ordered[counter]+'A') << ": ";
		cout << amount[ordered[counter]] << "\t(";
		cout << percentage <<"%)\t";

		if ((counter+1) % 3 == 0) cout << "\n";
	}

	cout << "\n";

	return(0);
}
