From d5586e57fcc7a6c41e0104c5983e66b21be9ab47 Mon Sep 17 00:00:00 2001 From: udlbook <110402648+udlbook@users.noreply.github.com> Date: Thu, 22 May 2025 12:12:42 -0400 Subject: [PATCH] Created using Colab --- Trees/SAT_Crossword_Answers.ipynb | 911 ++++++++++++++++++++++++++++++ 1 file changed, 911 insertions(+) create mode 100644 Trees/SAT_Crossword_Answers.ipynb diff --git a/Trees/SAT_Crossword_Answers.ipynb b/Trees/SAT_Crossword_Answers.ipynb new file mode 100644 index 0000000..618a211 --- /dev/null +++ b/Trees/SAT_Crossword_Answers.ipynb @@ -0,0 +1,911 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyMplmIQrTkWc/1oVZK7PhOy", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "QjHXD27ieTS-" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Crosswords with SAT\n", + "\n", + "The purpose of this Python notebook is to use investigate using SAT to find a valid arrangement of known answers in a crossword puzzle.\n", + "\n", + "You should have completed the notebook on SAT constructions before attempting this notebook. Note: this exercise is pretty hard. Expect it to take a while!\n", + "\n", + "You can save a local copy of this notebook in your Google account and work through it in Colab (recommended) or you can download the notebook and run it locally using Jupyter notebook or similar.\n", + "\n", + "Contact me at iclimbtreesmail@gmail.com if you find any mistakes or have any suggestions." + ], + "metadata": { + "id": "jtMs90veeZIn" + } + }, + { + "cell_type": "code", + "source": [ + "# Install relevant packages\n", + "!pip install z3-solver\n", + "from z3 import *\n", + "import numpy as np\n", + "import time" + ], + "metadata": { + "id": "mF6ngqCses3n", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "68804cab-5138-4ed6-c87b-4d27717acc8d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: z3-solver in /usr/local/lib/python3.11/dist-packages (4.14.1.0)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "First let's write some code to visualize a crossword problem. We'll represent the crossword as a ndarray of integers where each integer represents a letter index and zero represents a blank spot. " + ], + "metadata": { + "id": "3A5_7mByYrur" + } + }, + { + "cell_type": "code", + "source": [ + "puzzle = ['ALCOVE NSEC MIC',\n", + " 'LEANED ALTO ADO',\n", + " 'LAVALAMPOON CON',\n", + " 'ASSN EKG GABLE',\n", + " ' DENTI MEMO ',\n", + " ' AEOLIANHARPOON',\n", + " 'MOANER SAX SKUA',\n", + " 'ERS MVP TWI PTS',\n", + " 'OTTO AUS ESPRIT',\n", + " 'WAILINGWALLOON ',\n", + " ' NARA IDLES ',\n", + " 'REDYE UMA ECHO',\n", + " 'ARI FILMBUFFOON',\n", + " 'JOE UTNE SLOPPY',\n", + " 'ASS LEAR CORTEX']\n", + "\n", + "# Convert to a list of lists\n", + "for i in range(len(puzzle)):\n", + " puzzle[i] = [char for char in puzzle[i]]\n", + "\n", + "# Represent the puzzle as integers in a grid\n", + "puzzle_as_integers = np.zeros((len(puzzle), len(puzzle[0])), dtype=int)\n", + "for i in range(len(puzzle)):\n", + " for j in range(len(puzzle[i])):\n", + " if puzzle[i][j] == ' ':\n", + " puzzle_as_integers[i][j] = 0\n", + " else:\n", + " puzzle_as_integers[i][j] = ord(puzzle[i][j]) - ord('A') + 1\n", + "\n", + "print(puzzle)\n", + "print(puzzle_as_integers)" + ], + "metadata": { + "id": "cvGNbKkf-Qix", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "7ed1d16a-7cd8-4bdd-b790-26fdac322ab9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[['A', 'L', 'C', 'O', 'V', 'E', ' ', 'N', 'S', 'E', 'C', ' ', 'M', 'I', 'C'], ['L', 'E', 'A', 'N', 'E', 'D', ' ', 'A', 'L', 'T', 'O', ' ', 'A', 'D', 'O'], ['L', 'A', 'V', 'A', 'L', 'A', 'M', 'P', 'O', 'O', 'N', ' ', 'C', 'O', 'N'], ['A', 'S', 'S', 'N', ' ', ' ', 'E', 'K', 'G', ' ', 'G', 'A', 'B', 'L', 'E'], [' ', ' ', ' ', 'D', 'E', 'N', 'T', 'I', ' ', 'M', 'E', 'M', 'O', ' ', ' '], [' ', 'A', 'E', 'O', 'L', 'I', 'A', 'N', 'H', 'A', 'R', 'P', 'O', 'O', 'N'], ['M', 'O', 'A', 'N', 'E', 'R', ' ', 'S', 'A', 'X', ' ', 'S', 'K', 'U', 'A'], ['E', 'R', 'S', ' ', 'M', 'V', 'P', ' ', 'T', 'W', 'I', ' ', 'P', 'T', 'S'], ['O', 'T', 'T', 'O', ' ', 'A', 'U', 'S', ' ', 'E', 'S', 'P', 'R', 'I', 'T'], ['W', 'A', 'I', 'L', 'I', 'N', 'G', 'W', 'A', 'L', 'L', 'O', 'O', 'N', ' '], [' ', ' ', 'N', 'A', 'R', 'A', ' ', 'I', 'D', 'L', 'E', 'S', ' ', ' ', ' '], ['R', 'E', 'D', 'Y', 'E', ' ', 'U', 'M', 'A', ' ', ' ', 'E', 'C', 'H', 'O'], ['A', 'R', 'I', ' ', 'F', 'I', 'L', 'M', 'B', 'U', 'F', 'F', 'O', 'O', 'N'], ['J', 'O', 'E', ' ', 'U', 'T', 'N', 'E', ' ', 'S', 'L', 'O', 'P', 'P', 'Y'], ['A', 'S', 'S', ' ', 'L', 'E', 'A', 'R', ' ', 'C', 'O', 'R', 'T', 'E', 'X']]\n", + "[[ 1 12 3 15 22 5 0 14 19 5 3 0 13 9 3]\n", + " [12 5 1 14 5 4 0 1 12 20 15 0 1 4 15]\n", + " [12 1 22 1 12 1 13 16 15 15 14 0 3 15 14]\n", + " [ 1 19 19 14 0 0 5 11 7 0 7 1 2 12 5]\n", + " [ 0 0 0 4 5 14 20 9 0 13 5 13 15 0 0]\n", + " [ 0 1 5 15 12 9 1 14 8 1 18 16 15 15 14]\n", + " [13 15 1 14 5 18 0 19 1 24 0 19 11 21 1]\n", + " [ 5 18 19 0 13 22 16 0 20 23 9 0 16 20 19]\n", + " [15 20 20 15 0 1 21 19 0 5 19 16 18 9 20]\n", + " [23 1 9 12 9 14 7 23 1 12 12 15 15 14 0]\n", + " [ 0 0 14 1 18 1 0 9 4 12 5 19 0 0 0]\n", + " [18 5 4 25 5 0 21 13 1 0 0 5 3 8 15]\n", + " [ 1 18 9 0 6 9 12 13 2 21 6 6 15 15 14]\n", + " [10 15 5 0 21 20 14 5 0 19 12 15 16 16 25]\n", + " [ 1 19 19 0 12 5 1 18 0 3 15 18 20 5 24]]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's write a routine that draws this out nicely" + ], + "metadata": { + "id": "v7UbEiIjYdxj" + } + }, + { + "cell_type": "code", + "source": [ + "def draw_crossword(puzzle_as_integers):\n", + "\n", + " # Find number of rows and columns\n", + " n_rows = puzzle_as_integers.shape[0]\n", + " n_cols = puzzle_as_integers.shape[1]\n", + "\n", + " # Draw the top row\n", + " print(\"╔\", end=\"\")\n", + " for i in range(n_cols-1):\n", + " print(\"═╤\", end=\"\")\n", + " print(\"═╗\")\n", + "\n", + " for c_row in range(n_rows):\n", + " print(\"║\", end=\"\")\n", + " for c_col in range(n_cols):\n", + " if puzzle_as_integers[c_row][c_col] == 0:\n", + " print(u\"\\u2588\", end=\"\") # Use block character for blank spaces\n", + " else:\n", + " print(chr(puzzle_as_integers[c_row][c_col] + ord('A') - 1), end=\"\")\n", + " if(c_col < n_cols-1):\n", + " print(\"│\", end=\"\")\n", + " print(\"║\")\n", + "\n", + "\n", + " # Draw the bottom row\n", + " print(\"╚\", end=\"\")\n", + " for i in range(n_cols-1):\n", + " print(\"═╧\", end=\"\")\n", + " print(\"═╝\")\n", + "\n", + "draw_crossword(puzzle_as_integers)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gdJakiT6TrIU", + "outputId": "b89a05aa-a6b2-4fd1-ac77-60a435a30c91" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "╔═╤═╤═╤═╤═╤═╤═╤═╤═╤═╤═╤═╤═╤═╤═╗\n", + "║A│L│C│O│V│E│█│N│S│E│C│█│M│I│C║\n", + "║L│E│A│N│E│D│█│A│L│T│O│█│A│D│O║\n", + "║L│A│V│A│L│A│M│P│O│O│N│█│C│O│N║\n", + "║A│S│S│N│█│█│E│K│G│█│G│A│B│L│E║\n", + "║█│█│█│D│E│N│T│I│█│M│E│M│O│█│█║\n", + "║█│A│E│O│L│I│A│N│H│A│R│P│O│O│N║\n", + "║M│O│A│N│E│R│█│S│A│X│█│S│K│U│A║\n", + "║E│R│S│█│M│V│P│█│T│W│I│█│P│T│S║\n", + "║O│T│T│O│█│A│U│S│█│E│S│P│R│I│T║\n", + "║W│A│I│L│I│N│G│W│A│L│L│O│O│N│█║\n", + "║█│█│N│A│R│A│█│I│D│L│E│S│█│█│█║\n", + "║R│E│D│Y│E│█│U│M│A│█│█│E│C│H│O║\n", + "║A│R│I│█│F│I│L│M│B│U│F│F│O│O│N║\n", + "║J│O│E│█│U│T│N│E│█│S│L│O│P│P│Y║\n", + "║A│S│S│█│L│E│A│R│█│C│O│R│T│E│X║\n", + "╚═╧═╧═╧═╧═╧═╧═╧═╧═╧═╧═╧═╧═╧═╧═╝\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The goal of this notebook will be to take a set of words and create a crossword layout like this in an $n \\times n$ grid. We'll start with just a small set of clues. " + ], + "metadata": { + "id": "yDJLSLNtaSOL" + } + }, + { + "cell_type": "code", + "source": [ + "words = ['JANE','AUSTEN','PRIDE','NOVEL','DARCY','SENSE','EMMA','ESTATE','BENNET','BATH']" + ], + "metadata": { + "id": "NkNMg4GuYt-h" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "This routine takes the words, the grid size and various sets of constraints (which we'll develop one at a time). It then runs the solver and displays the crossword." + ], + "metadata": { + "id": "NdMkEZVl_Dci" + } + }, + { + "cell_type": "code", + "source": [ + "def solve_crossword (words, grid_size, add_constraint_set1, add_constraint_set2=None, add_constraint_set3=None, add_constraint_set4=None ):\n", + "\n", + " # Fail if longest string length is not large enough to fit in grid\n", + " longest_string_length = max(len(word) for word in words)\n", + " if (longest_string_length > grid_size):\n", + " print(\"Grid too small, no solution\")\n", + " return ;\n", + "\n", + " start_time = time.time()\n", + " # Set up the SAT solver\n", + " s = Solver()\n", + "\n", + " # This is a dictionary indexed by the word itself that contains the possible start\n", + " # positions of that word, and whether the word is horizontal or vertical\n", + " # The number of possible positions depend on the grid size as the word cannot exceed\n", + " # grid.\n", + " placement_vars = {word: [[[z3.Bool(f'{word}_{orientation}_{y},{x}')\n", + " for x in range(grid_size-len(word)+1 if orientation=='h' else grid_size )]\n", + " for y in range(grid_size-len(word)+1 if orientation=='v' else grid_size )]\n", + " for orientation in ['h', 'v']]\n", + " for word in words}\n", + "\n", + " # We will also define variables that indicate which letter is at which position\n", + " # There are 27 possible characters (26 letters and a blank)\n", + " # The variable x_i,j,k says that letter k is at position (i,j) in the grid\n", + " letter_posns = [[[ z3.Bool(\"x_{%d,%d,%d}\"%((i,j,k))) for k in range(0,27)] for j in range(0,grid_size) ] for i in range(0,grid_size) ]\n", + "\n", + " # Add the first set of constraints\n", + " s = add_constraint_set1(s, placement_vars, letter_posns, words, grid_size)\n", + " # Add the second set of constraints if present\n", + " if add_constraint_set2 is not None:\n", + " s = add_constraint_set2(s, placement_vars, letter_posns, words, grid_size)\n", + " # Add the third set of constraints if present\n", + " if add_constraint_set3 is not None:\n", + " s = add_constraint_set3(s, placement_vars, letter_posns, words, grid_size)\n", + " # Add the fourth set of constraints if present\n", + " if add_constraint_set4 is not None:\n", + " s = add_constraint_set4(s, placement_vars, letter_posns, words, grid_size)\n", + "\n", + " # Check if it's SAT (creates the model)\n", + " sat_result = s.check()\n", + " print(f\"Executed in {time.time()-start_time:.4f} seconds\")\n", + " print(sat_result)\n", + "\n", + " # If it is then draw crossword, otherwise return\n", + " if sat_result == z3.sat:\n", + " result = s.model()\n", + " # Retrieve the letter position variables in the solution as [0,1] values\n", + " x_vals = np.array([[[int(bool(result[z3.Bool(\"x_{%d,%d,%d}\" % (i, j, k))])) for k in range(0,27)] for j in range(0,grid_size) ] for i in range(0,grid_size) ] )\n", + "\n", + " # Find the position of the true value -- this is now a 2D grid with a 0 where there is a space and a value 1-26 representing a letter\n", + " solution = np.argmax(x_vals, axis=2)\n", + " # Draw the solution\n", + " draw_crossword(solution)\n", + " else:\n", + " print(\"No solution\")" + ], + "metadata": { + "id": "Ijuo4HdSefPk" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Here's a couple of helpful routines that we can make use of" + ], + "metadata": { + "id": "BBD_gK-q_AaT" + } + }, + { + "cell_type": "code", + "source": [ + "# Takes a list of z3.Bool variables and returns constraints\n", + "# ensuring that there is exactly one true\n", + "def exactly_one(x):\n", + " return PbEq([(i,1) for i in x],1)\n", + "\n", + "# Converts a word in capital letters to its indices so 'ABD' becomes [1,2,4]\n", + "def letter_to_index(word):\n", + " return [ord(char) - ord('A') + 1 for char in word]" + ], + "metadata": { + "id": "O5p-8Ul6cvsk" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Let's work on the first set of constraints. \n", + "\n", + "\n", + "1. Each word can only appear at one valid position\n", + "2. Each position in the grid can have only a single letter present\n", + "3. The letters implied by the word positions must agree where the words overlap\n", + "\n" + ], + "metadata": { + "id": "VT-QNf-FHClF" + } + }, + { + "cell_type": "code", + "source": [ + "def add_constraint_set1(s, placement_vars, letter_posns, words, grid_size):\n", + " # Constraint 1: Each word can only be placed in exactly one position\n", + " for word in words:\n", + " # Flatten the possible positions into a list\n", + " flat_list = [item for sublist1 in placement_vars[word] for sublist2 in sublist1 for item in sublist2]\n", + " # Add the constraint that exactly one must be true\n", + " s.add(exactly_one(flat_list))\n", + "\n", + " # Constraint 2: Each grid position can only have one letter present\n", + " for i in range(0,grid_size):\n", + " for j in range(0,grid_size):\n", + " s.add(exactly_one(letter_posns[i][j]))\n", + "\n", + " # Constraint 3: If a word is in a given position and orientation, the letters at the\n", + " # appropriate grid positions must correspond (uses the routine letter_to_index() defined above)\n", + " for word in words:\n", + " for i in range(0,grid_size):\n", + " for j in range(0,grid_size-len(word)+1):\n", + " for letter_index in range(0,len(word)):\n", + " s.add(Implies(placement_vars[word][0][i][j], letter_posns[i][j+letter_index][letter_to_index(word)[letter_index]]))\n", + " for i in range(0,grid_size-len(word)+1):\n", + " for j in range(0,grid_size):\n", + " for letter_index in range(0,len(word)):\n", + " s.add(Implies(placement_vars[word][1][i][j], letter_posns[i+letter_index][j][letter_to_index(word)[letter_index]]))\n", + "\n", + " return s" + ], + "metadata": { + "id": "NdjsISBCFr6K" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's test this routine so far\n", + "solve_crossword(words, 10, add_constraint_set1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ksTQlgvnHiqK", + "outputId": "d453c0b1-9507-4adf-9336-f38e4e6ff5ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "sat\n", + "╔═╤═╤═╤═╤═╤═╤═╤═╤═╤═╗\n", + "║A│█│█│E│S│T│A│T│E│S║\n", + "║U│B│E│N│N│E│T│O│J│C║\n", + "║S│F│S│V│E│Z│A│Z│A│G║\n", + "║T│E│H│X│V│B│R│D│N│T║\n", + "║E│S│E│N│S│E│C│A│E│A║\n", + "║N│T│O│█│█│K│Y│R│█│█║\n", + "║█│█│P│R│I│D│E│C│B│B║\n", + "║B│Z│A│B│B│J│I│Y│Q│█║\n", + "║J│B│A│T│H│B│E│M│M│A║\n", + "║N│O│V│E│L│A│E│B│U│S║\n", + "╚═╧═╧═╧═╧═╧═╧═╧═╧═╧═╝\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "You can see that the words are all in there, but we don't have blank spaces where we should. We need to add two further constraints to improve matters\n", + "\n", + "1. Horizontal words must have a blank space or an edge to the left and right of their positions. Vertical words must have a blank space or and edge above or below their positions.\n", + "2. Any position that is not part of a word should be blank\n", + "\n" + ], + "metadata": { + "id": "SlzqglXAH1wG" + } + }, + { + "cell_type": "code", + "source": [ + "def add_constraint_set2(s, placement_vars, letter_posns, words, grid_size):\n", + " # Constraint 1: Horizontal words must either start in the first column or have a 0 to their left\n", + " # Horizontal words must either finish in the last column of have a 0 to their right\n", + " # Vertical words must either start in the first row or have a 0 above them\n", + " # Vertical words must either end in the last row of have a 0 below them\n", + " for word in words:\n", + " # Horizontal words\n", + " for i in range(grid_size):\n", + " for j in range(1, grid_size - len(word)+1 ):\n", + " # Check for border or blank square before the word starts\n", + " s.add(Implies(placement_vars[word][0][i][j], letter_posns[i][j-1][0]))\n", + " s.add(Implies(placement_vars[word][0][i][j-1], letter_posns[i][j+len(word)-1][0]))\n", + "\n", + " # Vertical words\n", + " for i in range(1,grid_size - len(word)+1 ):\n", + " for j in range(grid_size):\n", + " # Check blank square before the word starts\n", + " s.add(Implies(placement_vars[word][1][i][j], letter_posns[i-1][j][0]))\n", + " s.add(Implies(placement_vars[word][1][i-1][j], letter_posns[i+len(word)-1][j][0]))\n", + "\n", + " # Constraint 2: Any position in the crossword grid that is not part of a word must be a blank space\n", + " # This stops random characters appearing outside the solution\n", + " for i in range(grid_size):\n", + " for j in range(grid_size):\n", + " # Create a list of placement variables that affect the current square\n", + " relevant_placements = []\n", + " for word in words:\n", + " # Horizontal words\n", + " for col in range(grid_size - len(word) + 1):\n", + " if j >= col and j < col + len(word):\n", + " relevant_placements.append(placement_vars[word][0][i][col])\n", + "\n", + " # Vertical words\n", + " for row in range(grid_size - len(word) + 1):\n", + " if i >= row and i < row + len(word):\n", + " relevant_placements.append(placement_vars[word][1][row][j])\n", + "\n", + "\n", + " # If none of the relevant placements are true, the square must be blank\n", + " s.add(Implies(Not(Or(relevant_placements)), letter_posns[i][j][0]))\n", + "\n", + " return s" + ], + "metadata": { + "id": "7iHXNe_0F7ej" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's test this routine so far\n", + "solve_crossword(words, 10, add_constraint_set1, add_constraint_set2)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lPEcYEIbItHp", + "outputId": "b6a560d2-9962-4be1-bc8b-86b853c77c64" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "sat\n", + "╔═╤═╤═╤═╤═╤═╤═╤═╤═╤═╗\n", + "║█│█│█│█│█│█│█│█│█│█║\n", + "║█│█│█│█│█│█│█│█│█│█║\n", + "║█│█│█│█│█│█│█│█│█│█║\n", + "║█│█│P│█│█│█│E│█│█│█║\n", + "║█│█│R│A│█│B│S│█│█│█║\n", + "║█│N│I│U│█│E│T│█│S│D║\n", + "║B│O│D│S│J│N│A│E│E│A║\n", + "║A│V│E│T│A│N│T│M│N│R║\n", + "║T│E│█│E│N│E│E│M│S│C║\n", + "║H│L│█│N│E│T│█│A│E│Y║\n", + "╚═╧═╧═╧═╧═╧═╧═╧═╧═╧═╝\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "That's an improvement, but it's not perfect; every letter is now part of either a horizontal or a vertical word. However, when there are several vertical words adjacent to each other and we read horizontally across these words, we get nonsense. We can fix this by adding another constraint:\n", + "\n", + "* If a letter is in a horizontal word, it is either inside a vertical word as well *OR* it has a blank square (or the edge of the grid) above and below it.\n", + "* If a letter is in a vertical word, it is either inside a horizontal word as well *OR* it has a blank square (or the edge of the grid) to the left and the right of it." + ], + "metadata": { + "id": "2X4guZwZI_JW" + } + }, + { + "cell_type": "code", + "source": [ + "def add_constraint_set3(s, placement_vars, letter_posns, words, grid_size):\n", + " # Constraint 1: If a letter is in a horizontal word, it either\n", + " # -- is inside a vertical word as well\n", + " # -- has a blank (or edge) above and below it\n", + " # If a letter in a vertical word exists, it is either\n", + " # -- is inside a horizontal word too\n", + " # -- has a blank (or edge) to the left and to the right of it.\n", + " for i in range(0,grid_size):\n", + " for j in range(0,grid_size):\n", + " relevant_placements_horz = []\n", + " relevant_placements_vert = []\n", + " for word in words:\n", + " for j2 in range (max(0,j-len(word)+1), min(j+1,grid_size-len(word)+1)):\n", + " relevant_placements_horz.append(placement_vars[word][0][i][j2])\n", + " for i2 in range(max(0,i-len(word)+1), min(i+1,grid_size-len(word)+1)):\n", + " relevant_placements_vert.append(placement_vars[word][1][i2][j])\n", + " in_horizontal_word = Or(relevant_placements_horz)\n", + " in_vertical_word = Or(relevant_placements_vert)\n", + "\n", + " if(i == 0):\n", + " above_and_below_are_blank = letter_posns[i+1][j][0]\n", + " else:\n", + " if(i == grid_size-1):\n", + " above_and_below_are_blank = letter_posns[i-1][j][0]\n", + " else:\n", + " above_and_below_are_blank = And(letter_posns[i-1][j][0],letter_posns[i+1][j][0])\n", + "\n", + " if(j == 0):\n", + " left_and_right_are_blank = letter_posns[i][j+1][0]\n", + " else:\n", + " if(j == grid_size-1):\n", + " left_and_right_are_blank = letter_posns[i][j-1][0]\n", + " else:\n", + " left_and_right_are_blank = And(letter_posns[i][j-1][0],letter_posns[i][j+1][0])\n", + " s.add(Implies(in_horizontal_word, Or(in_vertical_word, above_and_below_are_blank)))\n", + " s.add(Implies(in_vertical_word, Or(in_horizontal_word, left_and_right_are_blank)))\n", + "\n", + " return s" + ], + "metadata": { + "id": "4LSgimAjGQdT" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's see how this improves things\n", + "solve_crossword(words, 10, add_constraint_set1, add_constraint_set2, add_constraint_set3)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8HHqCWMzKCTL", + "outputId": "76f583ca-fcd5-4f67-9373-8a29596564ff" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "sat\n", + "╔═╤═╤═╤═╤═╤═╤═╤═╤═╤═╗\n", + "║D│A│R│C│Y│█│E│M│M│A║\n", + "║█│█│█│█│█│█│S│█│█│█║\n", + "║█│B│E│N│N│E│T│█│█│█║\n", + "║█│█│█│█│█│█│A│█│█│█║\n", + "║J│A│N│E│█│█│T│█│█│█║\n", + "║█│█│█│█│█│S│E│N│S│E║\n", + "║N│O│V│E│L│█│█│█│█│█║\n", + "║█│█│█│█│█│P│R│I│D│E║\n", + "║B│A│T│H│█│█│█│█│█│█║\n", + "║█│█│█│█│A│U│S│T│E│N║\n", + "╚═╧═╧═╧═╧═╧═╧═╧═╧═╧═╝\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "This looks like it's working better, but we now have the problem that words do not all connect to each other. Hence, we have to add a final constraint that all of the letters are connected.\n", + "\n", + "First we form an $N\\times N$ adjacency matrix which the value is true if word $i$ intersects with word $j$. Then we use the 'is_fully_connected' construction that we developed in the notebook on SAT constructions." + ], + "metadata": { + "id": "R69urJyN9wlc" + } + }, + { + "cell_type": "code", + "source": [ + "def is_fully_connected(s, adjacency):\n", + " # Size of the adjacency matrix\n", + " n_components = len(adjacency)\n", + " # We'll construct a N x N x log[N] array of variables\n", + " # The NxN variables in the first layer represent A, the variables in the second layer represent B and so on\n", + " n_layers = math.ceil(math.log(n_components,2))+1\n", + " connected = [[[ z3.Bool(\"conn_{%d,%d,%d}\"%((i,j,n))) for n in range(0, n_layers)] for j in range(0, n_components) ] for i in range(0, n_components) ]\n", + "\n", + " # Constraint 1\n", + " # The value in the top layer of the connected structure is equal to the adjacency matrix\n", + " for i in range(n_components):\n", + " for j in range(n_components):\n", + " s.add(connected[i][j][0]==adjacency[i][j])\n", + "\n", + " # Constraint 2\n", + " # Value at position [i,j] in layer n is value at position [i,j] of the binary matrix product of layer n-1 with itself\n", + " for n in range(1,n_layers):\n", + " for i in range(n_components):\n", + " for j in range(n_components):\n", + " matrix_entry_ij = False\n", + " for k in range(n_components):\n", + " matrix_entry_ij = Or(matrix_entry_ij, And(connected[i][k][n-1],connected[k][j][n-1]))\n", + " s.add(connected[i][j][n]==matrix_entry_ij)\n", + "\n", + " # Constraint 3 -- any row of column of the matrix should be full of ones at the end (everything is connected)\n", + " for i in range(n_components):\n", + " s.add(connected[i][0][n_layers-1])\n", + "\n", + " return s" + ], + "metadata": { + "id": "2uPPXENwLugr" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Helper routine that returns true if the current word is at position (i,j) in the grid\n", + "def word_at_position_ij(i,j, placement_vars, word, grid_size):\n", + "\n", + " relevant_placements = [] ;\n", + " # Deal with horizontal words first\n", + " for horz_pos in range(np.max([0, j-len(word)+1]), np.min([j+1, grid_size-len(word)+1])):\n", + " # First the horizontal words\n", + " relevant_placements.append(placement_vars[word][0][i][horz_pos])\n", + " # Then the vertical words\n", + " for vert_pos in range(np.max([0, i-len(word)+1]), np.min([i+1, grid_size-len(word)+1])):\n", + " relevant_placements.append(placement_vars[word][1][vert_pos][j])\n", + "\n", + " return Or(relevant_placements) ;" + ], + "metadata": { + "id": "FhJPmmAOV3AS" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def add_constraint_set4(s, placement_vars, letter_posns, words, grid_size):\n", + " # First lets create a new variable that represents the adjacency matrix of the words\n", + " adjacency = [[ z3.Bool(\"adj_{%d,%d}\"%((i,j))) for j in range(0, len(words)) ] for i in range(0, len(words)) ]\n", + "\n", + " # Run through each word\n", + " for c_w1 in range(len(words)):\n", + " for c_w2 in range(c_w1, len(words)):\n", + " # If indices are the same then adjacency is true\n", + " if c_w1 == c_w2:\n", + " s.add(adjacency[c_w1][c_w2])\n", + "\n", + " word1 = words[c_w1]\n", + " word2 = words[c_w2]\n", + " words_intersect = False\n", + " # Run through each position in the grid\n", + " for i in range(grid_size):\n", + " for j in range(grid_size):\n", + " # Words interset if both at a given position for any position\n", + " words_intersect = Or(words_intersect, And(word_at_position_ij(i,j, placement_vars, word1, grid_size), word_at_position_ij(i,j, placement_vars, word2, grid_size)))\n", + " # Set value and symmetric value of adjacency matrix\n", + " s.add(adjacency[c_w1][c_w2] == words_intersect)\n", + " s.add(adjacency[c_w2][c_w1] == adjacency[c_w1][c_w2])\n", + "\n", + " # Add the constraint that the adjacency matrix must be fully connected\n", + " s = is_fully_connected(s, adjacency)\n", + " return s" + ], + "metadata": { + "id": "Xmzv8g_-RIid" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's see how this improves things (took 32 seconds to run for me, but might be longer)\n", + "solve_crossword(words, 11, add_constraint_set1, add_constraint_set2, add_constraint_set3, add_constraint_set4)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M7pPhYTfaLEd", + "outputId": "fe616dd9-caca-4aea-e94c-fefaf5fe0ec4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Executed in 32.3613 seconds\n", + "sat\n", + "╔═╤═╤═╤═╤═╤═╤═╤═╤═╤═╤═╗\n", + "║█│█│█│█│█│█│█│█│█│█│█║\n", + "║J│█│█│█│█│█│█│█│█│█│█║\n", + "║A│█│█│B│█│█│█│█│█│█│P║\n", + "║N│O│V│E│L│█│█│█│█│█│R║\n", + "║E│█│█│N│█│█│█│B│█│█│I║\n", + "║█│█│█│N│█│S│█│A│█│█│D║\n", + "║D│█│█│E│█│E│S│T│A│T│E║\n", + "║A│U│S│T│E│N│█│H│█│█│█║\n", + "║R│█│█│█│█│S│█│█│█│█│█║\n", + "║C│█│█│█│█│E│M│M│A│█│█║\n", + "║Y│█│█│█│█│█│█│█│█│█│█║\n", + "╚═╧═╧═╧═╧═╧═╧═╧═╧═╧═╧═╝\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Now let's see what the smallest grid we can fit this in is. The longest word is 6 letters, so can't be shorter than this\n", + "solve_crossword(words, 10, add_constraint_set1, add_constraint_set2, add_constraint_set3, add_constraint_set4)\n", + "solve_crossword(words, 9, add_constraint_set1, add_constraint_set2, add_constraint_set3, add_constraint_set4)\n", + "solve_crossword(words, 8, add_constraint_set1, add_constraint_set2, add_constraint_set3, add_constraint_set4)\n", + "solve_crossword(words, 7, add_constraint_set1, add_constraint_set2, add_constraint_set3, add_constraint_set4)\n", + "solve_crossword(words, 6, add_constraint_set1, add_constraint_set2, add_constraint_set3, add_constraint_set4)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OFLdFlaP6g7L", + "outputId": "d7d8b512-84ef-4b48-a9f9-9912cd01622b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Executed in 139.9588 seconds\n", + "sat\n", + "╔═╤═╤═╤═╤═╤═╤═╤═╤═╤═╗\n", + "║█│█│█│█│█│█│█│█│B│█║\n", + "║█│█│█│█│█│J│█│█│E│█║\n", + "║D│█│█│█│█│A│█│█│N│█║\n", + "║A│U│S│T│E│N│█│█│N│█║\n", + "║R│█│█│█│S│E│N│S│E│█║\n", + "║C│█│█│█│T│█│O│█│T│█║\n", + "║Y│█│█│█│A│█│V│█│█│B║\n", + "║█│█│█│█│T│█│E│M│M│A║\n", + "║P│R│I│D│E│█│L│█│█│T║\n", + "║█│█│█│█│█│█│█│█│█│H║\n", + "╚═╧═╧═╧═╧═╧═╧═╧═╧═╧═╝\n", + "Executed in 261.2188 seconds\n", + "sat\n", + "╔═╤═╤═╤═╤═╤═╤═╤═╤═╗\n", + "║█│█│█│█│B│A│T│H│█║\n", + "║P│█│█│█│E│█│█│█│█║\n", + "║R│█│A│█│N│█│█│S│█║\n", + "║I│█│U│█│N│O│V│E│L║\n", + "║D│█│S│█│E│█│█│N│█║\n", + "║E│S│T│A│T│E│█│S│█║\n", + "║█│█│E│█│█│M│█│E│█║\n", + "║J│A│N│E│█│M│█│█│█║\n", + "║█│█│█│█│D│A│R│C│Y║\n", + "╚═╧═╧═╧═╧═╧═╧═╧═╧═╝\n", + "Executed in 199.8628 seconds\n", + "unsat\n", + "No solution\n", + "Executed in 4.0513 seconds\n", + "unsat\n", + "No solution\n", + "Executed in 1.7816 seconds\n", + "unsat\n", + "No solution\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "These were some random words that I chose for an example. Here are some real words from an NY Times mini puzzle. You can see that it recovers the minimal possible solution correctly." + ], + "metadata": { + "id": "kFQs0iZXmipH" + } + }, + { + "cell_type": "code", + "source": [ + "words2 = ['GUM','TAB','ERA','END','IRA','MAP','TIMWALZ','ONE','ELI','COATS','POPHITS', \\\n", + " 'GOT', 'PIE','BIZ','SPA','ALLSTAR','UNICORN','MEMOPAD','WAH','TEATIME']\n", + "solve_crossword(words2, 7, add_constraint_set1, add_constraint_set2, add_constraint_set3, add_constraint_set4)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sDmahKMoldGi", + "outputId": "0cf09d11-d14f-431b-e6e8-c08a4e7ea201" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Executed in 13.5120 seconds\n", + "sat\n", + "╔═╤═╤═╤═╤═╤═╤═╗\n", + "║G│U│M│█│T│A│B║\n", + "║O│N│E│█│E│L│I║\n", + "║T│I│M│W│A│L│Z║\n", + "║█│C│O│A│T│S│█║\n", + "║P│O│P│H│I│T│S║\n", + "║I│R│A│█│M│A│P║\n", + "║E│N│D│█│E│R│A║\n", + "╚═╧═╧═╧═╧═╧═╧═╝\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Further work for keeners:\n", + "\n", + "1. Technically, we should also put in a constraint to stop two words starting in the same place. For example, the current solver would allow the words 'EAT' and 'EATEN' to start in the same place and overlap. Add this constraint.\n", + "\n", + "2. As proposed, this is fairly impractical to help make crosswords. It would be more useful to have a superset of answers and allow the SAT solver to choose from these to make a sensible crossword. You would then ask the SAT solver to build an crossword of size $N$ that included at least $W$ words. Add this feature.\n" + ], + "metadata": { + "id": "c2ZiS3OgzHXW" + } + } + ] +} \ No newline at end of file