Saturday, July 24, 2010

Slightly Optimized C++ Program for running Markov Algorithms

I realise there is not much point giving you a 438-rule Markov Algorithm and telling you that it takes more than 6,000,000 iterations to do anything without some sort of execution engine.

Version 0.2 of my C++ Markov Algorithm Executor can be found after the jump. I apologise, my C++ is terrible.



#include < iostream >
#include < fstream >
#include < list >
#include < map >
#include < queue >
using namespace std;

struct InternalString {
  std::list tokens;
  int* token_index;
  std::map number_of_tokens;
  void add_token(int k){
    if (number_of_tokens.find(k) == number_of_tokens.end())
      number_of_tokens[k] = 1;
    else
      ++number_of_tokens[k];
    ++token_index[k];
  }

  std::string to_string(){
    return std::string(tokens.begin(), tokens.end());
  }

  InternalString & operator+=(const InternalString& w){
    for (std::map::const_iterator w_it = w.number_of_tokens.begin(); w_it != w.number_of_tokens.end(); ++w_it){
      token_index[w_it->first] += w_it->second;
    }
  }

  InternalString & operator-=(const InternalString& w){
    for (std::map::const_iterator w_it = w.number_of_tokens.begin(); w_it != w.number_of_tokens.end(); ++w_it){
      token_index[w_it->first] -= w_it->second;
    }
  }


  bool covers_the_tokens(const InternalString& w){
    for (std::map::const_iterator w_it = w.number_of_tokens.begin(); w_it != w.number_of_tokens.end(); ++w_it){
      if (token_index[w_it->first] < w_it->second)
 return false;
    }
    return true;
  }


  InternalString(){

  }

  InternalString(string s){
    token_index = new int[256];
    for (int i = 0; i < 256; ++i)
      token_index[i] = 0;
    for (int i = 0; i < s.size(); ++i){
      int z = (int) s[i];
      tokens.push_back(z);
      assert((z >= 0) && (z < 256));
      add_token(z);
    }
  }
};

class Rule {
  int* back_track;
  int* state_map;
  int initial_state;
  int terminal_state;
  int length_of_in_string;
  map number_of_tokens;
  InternalString strIn;
  InternalString strOut;
public:
  Rule(string in, string out, int rulenum){
    strIn = in;
    length_of_in_string = in.size();
    back_track = new int[length_of_in_string];
    state_map = new int[length_of_in_string << 8];
    for (int i = 0; i < length_of_in_string; ++i){
      back_track[i] = (i <= 1) ? 0 : state_map[(back_track[i-1] << 8) | ((int)in[i-1])];
      for (int j = 0; j < 256; ++j)
 state_map[(i << 8) | j] = (j == ((int)in[i])) ? (i + 1) : (i == 0) ? 0 : state_map[(back_track[i] << 8) | j];
    }
    initial_state = 0;
    terminal_state = length_of_in_string;
 fprintf(stderr, "Rule %d: %s->%s\n", rulenum, in.c_str(), out.c_str());
    strOut = InternalString(out);
  }

  bool reallyTryAndApply(InternalString& str){
    int state = initial_state;
    for (list::iterator lit = str.tokens.begin();
  lit != str.tokens.end(); ++lit){
      state = state_map[(state << 8) | (*lit)];
      if (state == terminal_state){
 ++lit;
 list::iterator lit2 = lit;
 for (int num = 0; num < length_of_in_string; ++num){
   --lit2;
 }
 list newout(strOut.tokens.begin(), strOut.tokens.end());
 str.tokens.splice(lit2, newout);
 str.tokens.erase(lit2, lit);
 str += strOut;
 str -= strIn;
 return true;
      }
    }
    return false;
  }

  bool tryAndApply(InternalString& str){
    if (!(str.covers_the_tokens(strIn))){
      return false;
    }
    return reallyTryAndApply(str);
  }
};


class DebugProfile {
  int lineRules[30][3];
  int ruleRules[30];
  int num_of_linerules;
  int num_of_rulerules;
public:
  DebugProfile() {
    num_of_linerules = 0;
    num_of_rulerules = 0;
  }
  bool alertOnRule(int rulenum){
    fprintf(stderr, "Always alert on rule %d\n", rulenum);
    assert (num_of_rulerules < 30);
    ruleRules[num_of_rulerules++] = rulenum;
  }

  bool alertOnLines(int st, int en, int stp){
    fprintf(stderr, "Alert every ");
 if ((stp >= -1) && (stp <= 1)){
      stp = -1;
   fprintf(stderr, "iteration");
 }
    else
      fprintf(stderr, "%dth iteration", stp);
    if (st > -1)
      fprintf(stderr, " from iteration #%d", st);
    if (en > -1)
      fprintf(stderr, " until iteration #%d", en);
    fprintf(stderr, "\n");
    assert(num_of_linerules < 30);
    lineRules[num_of_linerules][0] = st;
    lineRules[num_of_linerules][1] = en;
    lineRules[num_of_linerules++][2] = stp;
  }

  bool traceIteration(int iter){
    for (int i = 0; i < num_of_linerules; ++i){
      if (iter >= lineRules[i][0])
 if ((lineRules[i][1] == -1) || (iter < lineRules[i][1]))
   if ((lineRules[i][2] == -1) || (((iter - lineRules[i][0]) % lineRules[i][2]) == 0))
     return true;
    }
    return false;
  }

  bool traceRule(int rule){
    for (int i = 0; i < num_of_rulerules; ++i)
      if (rule == ruleRules[i])
 return true;
    return false;
  }
};

class Comp {
  vector rules;
  int iteration_count;
  DebugProfile dbg;
public:
  Comp(ifstream& fin, DebugProfile d){
    dbg = d;
 int num_rules = 0;
    char line[1024];
    while (!fin.eof()){
      fin.getline(line, 1024);
      int n = strlen(line);
      if ((n >= 7) && 
   (line[0] == '[') && 
   (line[1] == 'd') && 
   (line[2] == 'e') && 
   (line[3] == 'b') &&
   (line[4] == 'u') && 
   (line[5] == 'g')){
 if ((line[6] == 'r') && (line[7] == ']')){
   int rulenum = 0;
   for (int i = 8; i < n; ++i)
     if ((line[i] >= '0') && (line[i] <= '9'))
       rulenum = 10 * rulenum + (line[i] - '0');
   dbg.alertOnRule(rulenum);
 }
 if ((line[6] == 'l') && (line[7] == ']') && (line[8] == '[')){
   int start = 0; int end = -1; int step = -1;
   int i = 9;
   for (; (i < n) && (line[i] != ':'); ++i)
     if ((line[i] >= '0') && (line[i] <= '9')){
       if (start == -1)
  start = 0;
       start = 10 * start + (line[i] - '0');
     };
 ++i;
   for (; (i < n) && (line[i] != ':'); ++i)
     if ((line[i] >= '0') && (line[i] <= '9')){
       if (end == -1)
  end = 0;
       end = 10 * end + (line[i] - '0');
     };
  ++i;
   for (; (i < n) && (line[i] != ']'); ++i)
     if ((line[i] >= '0') && (line[i] <= '9')){
       if (step == -1)
  step = 0;
       step = 10 * step + (line[i] - '0');
     };
   dbg.alertOnLines(start, end, step);
 }
      }
      int i = 0;
      while ((i < n-1) && ((line[i] != '-') || (line[i+1] != '>')))
 ++i;
      if (i >= n-1)
 continue;
      string strOut = (line + (i + 2));
      line[i] = '\0';
      string strIn = line;
   ++num_rules;
      rules.push_back(Rule(strIn, strOut, num_rules));
    }
    fprintf(stderr, "%d rules created\n", rules.size());
  }

  string apply(string st){
    InternalString str(st);
    iteration_count = 0;
    bool matched = true;
    while (matched){
      matched = false;
      int rulenum = 0;
      for (vector::iterator rule_it = rules.begin(); rule_it != rules.end(); ++rule_it){
 ++rulenum;
 if (rule_it->tryAndApply(str)){
   ++iteration_count;
   if (dbg.traceIteration(iteration_count) || dbg.traceRule(rulenum)){
     fprintf(stderr, "Rule application #%d: (of rule number %d)\n",iteration_count, rulenum);
     fprintf(stderr, "%s\n", str.to_string().c_str());
   }
   matched = true;
   break;
 }
      }
    }
    fprintf(stderr, "%d iterations\n", iteration_count);
    return str.to_string();
  }
};

int main(int argc, char* argv[]){
  std::ifstream comp_in(argv[1]);
  Comp c(comp_in, DebugProfile());

  std::ifstream fin(argv[2]);
  while (!fin.eof()){
    char line[2048];
    fin.getline(line, 2048);
    if (strlen(line) == 0)
      continue;
    printf("%s\n", c.apply((string)line).c_str());
  }
}

It takes 2 parameters, both of which are filenames. The first filename points to a Markov Algorithm (such as this one), and the second filename points to a list of input lines (such as the input data in the previous link).

No comments:

Post a Comment