Version 0.2 of my C++ Markov Algorithm Executor can be found after the jump. I apologise, my C++ is terrible.
#include < iostream > #include < fstream > #include < list > #include < map > #include < queue > using namespace std; struct InternalString { std::listtokens; int* token_index; std::map number_of_tokens; void add_token(int k){ if (number_of_tokens.find(k) == number_of_tokens.end()) number_of_tokens[k] = 1; else ++number_of_tokens[k]; ++token_index[k]; } std::string to_string(){ return std::string(tokens.begin(), tokens.end()); } InternalString & operator+=(const InternalString& w){ for (std::map ::const_iterator w_it = w.number_of_tokens.begin(); w_it != w.number_of_tokens.end(); ++w_it){ token_index[w_it->first] += w_it->second; } } InternalString & operator-=(const InternalString& w){ for (std::map ::const_iterator w_it = w.number_of_tokens.begin(); w_it != w.number_of_tokens.end(); ++w_it){ token_index[w_it->first] -= w_it->second; } } bool covers_the_tokens(const InternalString& w){ for (std::map ::const_iterator w_it = w.number_of_tokens.begin(); w_it != w.number_of_tokens.end(); ++w_it){ if (token_index[w_it->first] < w_it->second) return false; } return true; } InternalString(){ } InternalString(string s){ token_index = new int[256]; for (int i = 0; i < 256; ++i) token_index[i] = 0; for (int i = 0; i < s.size(); ++i){ int z = (int) s[i]; tokens.push_back(z); assert((z >= 0) && (z < 256)); add_token(z); } } }; class Rule { int* back_track; int* state_map; int initial_state; int terminal_state; int length_of_in_string; map number_of_tokens; InternalString strIn; InternalString strOut; public: Rule(string in, string out, int rulenum){ strIn = in; length_of_in_string = in.size(); back_track = new int[length_of_in_string]; state_map = new int[length_of_in_string << 8]; for (int i = 0; i < length_of_in_string; ++i){ back_track[i] = (i <= 1) ? 0 : state_map[(back_track[i-1] << 8) | ((int)in[i-1])]; for (int j = 0; j < 256; ++j) state_map[(i << 8) | j] = (j == ((int)in[i])) ? (i + 1) : (i == 0) ? 0 : state_map[(back_track[i] << 8) | j]; } initial_state = 0; terminal_state = length_of_in_string; fprintf(stderr, "Rule %d: %s->%s\n", rulenum, in.c_str(), out.c_str()); strOut = InternalString(out); } bool reallyTryAndApply(InternalString& str){ int state = initial_state; for (list ::iterator lit = str.tokens.begin(); lit != str.tokens.end(); ++lit){ state = state_map[(state << 8) | (*lit)]; if (state == terminal_state){ ++lit; list ::iterator lit2 = lit; for (int num = 0; num < length_of_in_string; ++num){ --lit2; } list newout(strOut.tokens.begin(), strOut.tokens.end()); str.tokens.splice(lit2, newout); str.tokens.erase(lit2, lit); str += strOut; str -= strIn; return true; } } return false; } bool tryAndApply(InternalString& str){ if (!(str.covers_the_tokens(strIn))){ return false; } return reallyTryAndApply(str); } }; class DebugProfile { int lineRules[30][3]; int ruleRules[30]; int num_of_linerules; int num_of_rulerules; public: DebugProfile() { num_of_linerules = 0; num_of_rulerules = 0; } bool alertOnRule(int rulenum){ fprintf(stderr, "Always alert on rule %d\n", rulenum); assert (num_of_rulerules < 30); ruleRules[num_of_rulerules++] = rulenum; } bool alertOnLines(int st, int en, int stp){ fprintf(stderr, "Alert every "); if ((stp >= -1) && (stp <= 1)){ stp = -1; fprintf(stderr, "iteration"); } else fprintf(stderr, "%dth iteration", stp); if (st > -1) fprintf(stderr, " from iteration #%d", st); if (en > -1) fprintf(stderr, " until iteration #%d", en); fprintf(stderr, "\n"); assert(num_of_linerules < 30); lineRules[num_of_linerules][0] = st; lineRules[num_of_linerules][1] = en; lineRules[num_of_linerules++][2] = stp; } bool traceIteration(int iter){ for (int i = 0; i < num_of_linerules; ++i){ if (iter >= lineRules[i][0]) if ((lineRules[i][1] == -1) || (iter < lineRules[i][1])) if ((lineRules[i][2] == -1) || (((iter - lineRules[i][0]) % lineRules[i][2]) == 0)) return true; } return false; } bool traceRule(int rule){ for (int i = 0; i < num_of_rulerules; ++i) if (rule == ruleRules[i]) return true; return false; } }; class Comp { vector rules; int iteration_count; DebugProfile dbg; public: Comp(ifstream& fin, DebugProfile d){ dbg = d; int num_rules = 0; char line[1024]; while (!fin.eof()){ fin.getline(line, 1024); int n = strlen(line); if ((n >= 7) && (line[0] == '[') && (line[1] == 'd') && (line[2] == 'e') && (line[3] == 'b') && (line[4] == 'u') && (line[5] == 'g')){ if ((line[6] == 'r') && (line[7] == ']')){ int rulenum = 0; for (int i = 8; i < n; ++i) if ((line[i] >= '0') && (line[i] <= '9')) rulenum = 10 * rulenum + (line[i] - '0'); dbg.alertOnRule(rulenum); } if ((line[6] == 'l') && (line[7] == ']') && (line[8] == '[')){ int start = 0; int end = -1; int step = -1; int i = 9; for (; (i < n) && (line[i] != ':'); ++i) if ((line[i] >= '0') && (line[i] <= '9')){ if (start == -1) start = 0; start = 10 * start + (line[i] - '0'); }; ++i; for (; (i < n) && (line[i] != ':'); ++i) if ((line[i] >= '0') && (line[i] <= '9')){ if (end == -1) end = 0; end = 10 * end + (line[i] - '0'); }; ++i; for (; (i < n) && (line[i] != ']'); ++i) if ((line[i] >= '0') && (line[i] <= '9')){ if (step == -1) step = 0; step = 10 * step + (line[i] - '0'); }; dbg.alertOnLines(start, end, step); } } int i = 0; while ((i < n-1) && ((line[i] != '-') || (line[i+1] != '>'))) ++i; if (i >= n-1) continue; string strOut = (line + (i + 2)); line[i] = '\0'; string strIn = line; ++num_rules; rules.push_back(Rule(strIn, strOut, num_rules)); } fprintf(stderr, "%d rules created\n", rules.size()); } string apply(string st){ InternalString str(st); iteration_count = 0; bool matched = true; while (matched){ matched = false; int rulenum = 0; for (vector ::iterator rule_it = rules.begin(); rule_it != rules.end(); ++rule_it){ ++rulenum; if (rule_it->tryAndApply(str)){ ++iteration_count; if (dbg.traceIteration(iteration_count) || dbg.traceRule(rulenum)){ fprintf(stderr, "Rule application #%d: (of rule number %d)\n",iteration_count, rulenum); fprintf(stderr, "%s\n", str.to_string().c_str()); } matched = true; break; } } } fprintf(stderr, "%d iterations\n", iteration_count); return str.to_string(); } }; int main(int argc, char* argv[]){ std::ifstream comp_in(argv[1]); Comp c(comp_in, DebugProfile()); std::ifstream fin(argv[2]); while (!fin.eof()){ char line[2048]; fin.getline(line, 2048); if (strlen(line) == 0) continue; printf("%s\n", c.apply((string)line).c_str()); } }
It takes 2 parameters, both of which are filenames. The first filename points to a Markov Algorithm (such as this one), and the second filename points to a list of input lines (such as the input data in the previous link).
No comments:
Post a Comment