Efficiency of Cryptarithmetic Algorithm solver decomposition

1k Views Asked by At

The problem is the following: Given "ABC+DEF=GHI" format string, where A,B,C etc. represent unique digits, find the expression that gives maximum GHI. Ex: Input string is AAB+AAB=AAB, then there's no solution. If it is instead AAA + BBB = AAA, a solution is 999 + 000 = 999. Another example string: ABC + CBA = GGG, a result is => 543 + 345 = 888.

I have ruled out impossible cases easily. The algorithm I have in mind is a bruteforce, that simply tries maximizing the rhs first. However my problem was doing this fast, and also watching out for the unique digits. What's an efficient way to solve this problem?

Notes: I wish to solve this in a singlethreaded approach, and my current problem is detecting if a unique digit is used in "assign_value" function. Perhaps a better method to assign values is there?

EDIT: As per smci's suggestion, here's what I want to achieve, in the very end: ABRA + CADABRA + ABRA + CADABRA == HOUDINI ; 7457 + 1797457 + 7457 + 1797457 == 3609828 -- A system that can handle not only strings of the form I provided in the beginning (3 digit number + 3 digit number = 3 digit number) but also those. However it doesn't hurt to start simple and go with the solution of format I gave :)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_EXPRESSION_SIZE 11 + 1
#define MAX_VARIABLES 9

int variables_read[MAX_VARIABLES] = { 0 };

struct variable {
    int coefficient;
    int* ptr;
    int side;
    int canhavezero;
    unsigned value_max;
};

typedef struct variable Variable;

struct equation {
    Variable* variables[9]; // max
    unsigned distinct_on_rhs;
    unsigned var_count;
};

typedef struct equation Equation;

int int_pow(int n, int k) {
    int res = 1;
    for(int i = 0; i < k; ++i)
        res *= n;
    return res;
}

void AddVariable(Equation* E, Variable* V) {
    E->variables[E->var_count++] = V;
}

int IsImpossible(char* expression) {
    // if all letters are same or end letters are same, no solution
    if(
        (expression[0] == expression[4] && expression[0] == expression[8]) ||
        (!strncmp(expression, expression + 4, 3) && !strncmp(expression, expression + 8, 3))
      )
        return 1;
    return 0;
}

int assign_value(Equation* E, int pos, int* values) {
    if(!E->variables[pos]->value_count) {
        if(pos < 0)
            return 2;
        // if no possible values left, reset this, but take one value count from the closest variable
        E->variables[pos - 1]->value_count--;
        E->variables[pos]->value_count = E->variables[pos]->value_max;
        return 0;
    }
    int i;
    for(i = 9; i >= 0 && values[i] == -1; --i)
    printf("Assigning %d to %c\n", E->variables[pos]->value_set[E->variables[pos]->value_count - 1], 'A' + (E->variables[pos]->ptr - E->variables[0]->ptr));
    *(E->variables[pos]->ptr) = values[i];
    values[i] = -1; // we have unique numbers
    return 0;
}

int isSolved(Equation E) {
    int sum = 0, coeff = 0;
    printf("Trying...\n");
    for(int i = 0; i < E.var_count; ++i) {
        coeff = E.variables[i]->coefficient * (*E.variables[i]->ptr);
        printf("%d ", *E.variables[i]->ptr);
        if(E.variables[i]->side)
            coeff *= -1;
        sum += coeff;
    }
    printf("\nSum was %d\n", sum);
    return !sum;
}

char* evaluate(char* expression) {
    char* res;
    // check for impossible cases first
    if(IsImpossible(expression)) {
        res = (char *) malloc(sizeof(char) * strlen("No Solution!"));
        strcpy(res, "No Solution!");
        return res;
    }
    res = (char *) malloc(sizeof(char) * MAX_EXPRESSION_SIZE);
    // now try to find solutions, first describe the given characters as equations
    Equation E;
    E.var_count = 0;
    E.distinct_on_rhs = 0;
    int side_mode = 0, powcounter = 0;
    int a = -1, b = -1, c = -1, d = -1, e = -1, f = -1, g = -1, h = -1, i = -1;
    int* max_variables[MAX_VARIABLES] = { &a, &b, &c, &d, &e, &f, &g, &h, &i };
    for(int j = 0; j < MAX_EXPRESSION_SIZE - 1; ++j) {
        if(expression[j] == '+')
            continue;
        if(expression[j] == '=') {
            side_mode = 1;
            continue;
        }
        Variable* V = (Variable *) malloc(sizeof(Variable));
        // we know we always get 3 digit numbers but we can easily change if we need to
        V->coefficient = int_pow(10, 2 - (powcounter % 3));
        V->ptr = max_variables[expression[j] - 'A'];
        V->side = side_mode;
        E.distinct_on_rhs += side_mode && !variables_read[expression[j] - 'A'];
        if(!(powcounter % 3)) { // beginning of a number
            V->value_count = 9;
            V->value_max = 9;
            V->canhavezero = 0;
        }
        else {
            V->value_count = 10;
            V->value_max = 10;
            V->canhavezero = 1;
        }
        AddVariable(&E, V);
        variables_read[expression[j] - 'A'] = 1;
        ++powcounter;
    }
    for(int j = 0; j < E.var_count; ++j)
        printf("%d %c %d\n", E.variables[j]->coefficient, 'A' + (E.variables[j]->ptr - max_variables[0]), E.variables[j]->side);
    // we got a representaion of the equation, now try to solve it
    int solved = 0;
    // O(9^N), where N is number of distinct variables.
    // An optimization we can do is, we first assign possible max values to rhs number, then go down. We need max number.
    printf("Distincts: %d\n", E.distinct_on_rhs);
    do {
        // try to assign values to all variables and try if it solves the equation
        // but first try to assign rhs as max as possible
        int values[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
        int temp = E.var_count - E.distinct_on_rhs;
        while(temp < E.var_count) {
            solved = assign_value(&E, temp, values);
            ++temp;
        }
        for(int j = E.var_count - 1 - E.distinct_on_rhs; j >= 0; --j)
            solved = assign_value(&E, j, values);
        if(solved) // can return no solution
            break;
        printf("Solving...\n");
        solved = isSolved(E);
        system("PAUSE");
    } while(!solved);
    if(solved == 2) {
        res = (char *) malloc(sizeof(char) * strlen("No Solution!"));
        strcpy(res, "No Solution!");
    }
    else {

    }
    return res;
}

int main() {
    char expression[MAX_EXPRESSION_SIZE] = { 0 };
    do {
        printf("Enter the formula: ");
        scanf("%s", expression);
        char* res = evaluate(expression);
        printf("%s\n", res);
        free(res);
    } while(expression[0] != '-');
    return 0;
}
3

There are 3 best solutions below

4
alain On BEST ANSWER

I would start with the result. There are not that many different cases:

  • AAA
  • AAB, ABA, BAA
  • ABC

All other cases can be reduced to these by renaming the variables. ABC + CBA = GGG would become DBC + CBD = AAA.

Then you have

  • 10 possible solutions for the one-variable case AAA
  • 90 (10*9) for the two variable cases
  • 720 (10*9*8) for the three variable case

assuming that zero is allowed anywhere. If not, you can filter out those that are not allowed.

This sets the variables for the right side of the equation. Each variable that appears only on the left, adds possible solutions. B adds a factor of 9, C a factor of 8, D 7 and so forth.

0
chux - Reinstate Monica On

The most "efficient" solution would take all knowledge of the task and simple print the result. So the question is how much of the conditions can be coded and where and what flexibility is needed.

An alternative is to view the generation of test cases and evaluation of them separately.

A simple recursion function can generate the 10! (362880) test cases of unique digits.

unsigned long long count = 0;
unsigned long long sol = 0;

void evaluate(int object[]) {
  count++;
  int ABC = object[0] * 100 + object[1] * 10 + object[2];
  int DEF = object[3] * 100 + object[4] * 10 + object[5];
  int GHI = object[6] * 100 + object[7] * 10 + object[8];
  if (ABC + DEF == GHI) {
    printf("%4llu %03d + %03d = %03d\n", ++sol, ABC,DEF,GHI);
  }
}

void form_combos(int pool[], size_t pool_count, int object[],
    size_t object_count, size_t object_count_max) {
  if (object_count >= object_count_max) {
    evaluate(object);
    return;
  }
  assert(pool_count > 0);
  int *pool_end = pool + pool_count - 1;
  for (size_t p = 0; p < pool_count; p++) {
    int sample = pool[p];  // take one out
    pool[p] = *pool_end;   // replace it with the end
    object[object_count] = sample;
    form_combos(pool, pool_count - 1, object, object_count + 1,
        object_count_max);
    pool[p] = sample;      // restore pool item
  }
}

int main() {
  int pool[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
  size_t pool_size = sizeof pool / sizeof pool[0];
  #define object_count 9
  int object[object_count];
  form_combos(pool, pool_size, object, 0, object_count);
  printf("Evaluate() iterations %llu\n", count);
}

Output

   1 091 + 762 = 853
   2 091 + 763 = 854
   3 091 + 735 = 826
...
1726 874 + 061 = 935
1727 875 + 046 = 921
1728 876 + 045 = 921
Evaluate() iterations 3628800

What is nice about this approach is that if the task was now find

ABC*ABC + DEF*DEF == GHI*GHI

Changing only 2 lines of code:

  if (ABC*ABC + DEF*DEF == GHI*GHI) {
    printf("%4llu sqr(%03d) + sqr(%03d) = sqr(%03d)\n", ++sol, ABC,DEF,GHI);
  }

results in

   1 sqr(534) + sqr(712) = sqr(890)
   2 sqr(546) + sqr(728) = sqr(910)
   3 sqr(712) + sqr(534) = sqr(890)
   4 sqr(728) + sqr(546) = sqr(910)
Evaluate() iterations 3628800
0
SenselessCoder On

Ok, so for a trivial solution (a base to build a generalization on, so far it only works on the format <3 digit number> + <3 digit number> = <3 digit number>) inspired from @chux and @alain's suggestions is the following code. It truly runs on O(10^N) where N is the distinct number of digits present, or variables if you'd like to call them that. I'll see if I can generalize this even further.

Note that this is for the initial problem of finding the largest rhs. Take that into account as well.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_DIGITS 10
#define MAX_VARIABLES 9
#define MAX_EXPRESSION_SIZE 11

int IsImpossible(char* expression) {
    // if all letters are same or end letters are same, no solution
    if(
        (expression[0] == expression[4] && expression[0] == expression[8]) ||
        (!strncmp(expression, expression + 4, 3) && !strncmp(expression, expression + 8, 3))
      )
        return 1;
    return 0;
}

int ArePointersAssigned(int*** pointers) {
    for(int i = 0; i < MAX_VARIABLES; ++i) {
        if(**pointers[i] == -1)
            return 0;
    }
    return 1;
}

int evaluate(int*** pointers) {
    int ABC = *(*pointers[0]) * 100 + *(*pointers[1]) * 10 + *(*pointers[2]);
    int DEF = *(*pointers[3]) * 100 + *(*pointers[4]) * 10 + *(*pointers[5]);
    int GHI = *(*pointers[6]) * 100 + *(*pointers[7]) * 10 + *(*pointers[8]);
    if (ABC + DEF == GHI) { // since we use dfs, if this is a solution simply return it
        //printf("%d + %d = %d\n", ABC, DEF, GHI);
        return 1;
    }
    return 0;
}

// use the solved pointer to escape recursion early
// check_end checks if we reached 6 for the 2nd time, if it's first time we ignore (because it's start state)
void form_combos(int pool[], int pool_count, int object_count, int*** pointers, int* solved) {
    if(object_count == MAX_DIGITS - 1)
        object_count = 0;
    if(*solved) // if a branch solved this, escape recursion
        return;
    if (ArePointersAssigned(pointers)) { // that means we got a full equation set
        *solved = evaluate(pointers);
        if(*solved)
            return;
    }
    int *pool_end = pool + pool_count - 1;
    for (int p = pool_count - 1; p >= 0 && !*solved; p--) {
        int sample = pool[p];  // take one out
        pool[p] = *pool_end;   // replace it with the end
        int temp = **pointers[object_count];
        if(**pointers[object_count] == -1)
            **pointers[object_count] = sample;
        form_combos(pool, pool_count - 1, object_count + 1, pointers, solved);
        pool[p] = sample;      // restore pool item
        if(!*solved)
            **pointers[object_count] = temp;
    }
}

int main() {
    char expression[MAX_EXPRESSION_SIZE] = { 0 };
    printf("Enter the formula: ");
    scanf("%s", expression);
    while(expression[0] != '-') {
        if(IsImpossible(expression))
            printf("No solution!\n");
        else {
            int digits[MAX_DIGITS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
            int object[MAX_VARIABLES] = { -1, -1, -1, -1, -1, -1, -1, -1, -1 }; // stack for dfs
            int *A = &object[0], *B = &object[1], *C = &object[2], 
                *D = &object[3], *E = &object[4], *F = &object[5], 
                *G = &object[6], *H = &object[7], *I = &object[8];
            // set same pointers
            int** pointers[MAX_VARIABLES] = { &A, &B, &C, &D, &E, &F, &G, &H, &I };

            // analyze the equation
            int var = 0;
            for(int p = 0; p < MAX_EXPRESSION_SIZE; ++p) {
                if(expression[p] >= 'A' && expression[p] <= 'I') {
                    *pointers[var++] = &object[expression[p] - 'A']; // link same pointers
                }
            }
            int solved = 0, check_end = 0;
            form_combos(digits, MAX_DIGITS, MAX_DIGITS - 4, pointers, &solved);
            if(!solved) // it can be unsolvable still
                printf("No solution!\n");
            else
                printf("%d%d%d + %d%d%d = %d%d%d\n", *A, *B, *C, *D, *E, *F, *G, *H, *I);
        }
        printf("Enter the formula: ");
        scanf("%s", expression);
    }
    return 0;
}