From 3cce36f24f02146986c11cd9ef6381e36952f866 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Sun, 4 Sep 2016 14:20:41 -0400 Subject: Implement typo detection for literals. --- Makefile | 2 +- parse.c | 13 ++++- typo.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ typo.h | 26 ++++++++++ 4 files changed, 212 insertions(+), 2 deletions(-) create mode 100644 typo.c create mode 100644 typo.h diff --git a/Makefile b/Makefile index d2373b4..b80873d 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ ALL_LDFLAGS = $(ALL_CFLAGS) $(LDFLAGS) all: bfs -bfs: bftw.o color.o dstring.o eval.o main.o parse.o +bfs: bftw.o color.o dstring.o eval.o main.o parse.o typo.o $(CC) $(ALL_LDFLAGS) $^ -o $@ release: CFLAGS := -O3 -flto -Wall -DNDEBUG diff --git a/parse.c b/parse.c index e720ba3..b8f79ad 100644 --- a/parse.c +++ b/parse.c @@ -10,6 +10,7 @@ *********************************************************************/ #include "bfs.h" +#include "typo.h" #include #include #include @@ -1499,8 +1500,18 @@ static struct expr *parse_literal(struct parser_state *state) { } } + const struct table_entry *best = NULL; + int best_dist; + for (const struct table_entry *entry = parse_table; entry->arg; ++entry) { + int dist = typo_distance(arg, entry->arg); + if (!best || dist < best_dist) { + best = entry; + best_dist = dist; + } + } + pretty_error(cmdline->stderr_colors, - "error: Unknown argument '%s'.\n", arg); + "error: Unknown argument '-%s'; did you mean '-%s'?\n", arg, best->arg); return NULL; } diff --git a/typo.c b/typo.c new file mode 100644 index 0000000..e3b8e43 --- /dev/null +++ b/typo.c @@ -0,0 +1,173 @@ +/********************************************************************* + * bfs * + * Copyright (C) 2016 Tavian Barnes * + * * + * This program is free software. It comes without any warranty, to * + * the extent permitted by applicable law. You can redistribute it * + * and/or modify it under the terms of the Do What The Fuck You Want * + * To Public License, Version 2, as published by Sam Hocevar. See * + * the COPYING file or http://www.wtfpl.net/ for more details. * + *********************************************************************/ + +#include "typo.h" +#include +#include +#include + +struct coords { + int x, y; +}; + +// Assume QWERTY layout for now +static const struct coords key_coords[UCHAR_MAX] = { + ['`'] = { 0, 0}, + ['~'] = { 0, 0}, + ['1'] = { 3, 0}, + ['!'] = { 3, 0}, + ['2'] = { 6, 0}, + ['@'] = { 6, 0}, + ['3'] = { 9, 0}, + ['#'] = { 9, 0}, + ['4'] = {12, 0}, + ['$'] = {12, 0}, + ['5'] = {15, 0}, + ['%'] = {15, 0}, + ['6'] = {18, 0}, + ['^'] = {18, 0}, + ['7'] = {21, 0}, + ['&'] = {21, 0}, + ['8'] = {24, 0}, + ['*'] = {24, 0}, + ['9'] = {27, 0}, + ['('] = {27, 0}, + ['0'] = {30, 0}, + [')'] = {30, 0}, + ['-'] = {33, 0}, + ['_'] = {33, 0}, + ['='] = {36, 0}, + ['+'] = {36, 0}, + + ['\t'] = { 1, 3}, + ['q'] = { 4, 3}, + ['Q'] = { 4, 3}, + ['w'] = { 7, 3}, + ['W'] = { 7, 3}, + ['e'] = {10, 3}, + ['E'] = {10, 3}, + ['r'] = {13, 3}, + ['R'] = {13, 3}, + ['t'] = {16, 3}, + ['T'] = {16, 3}, + ['y'] = {19, 3}, + ['Y'] = {19, 3}, + ['u'] = {22, 3}, + ['U'] = {22, 3}, + ['i'] = {25, 3}, + ['I'] = {25, 3}, + ['o'] = {28, 3}, + ['O'] = {28, 3}, + ['p'] = {31, 3}, + ['P'] = {31, 3}, + ['['] = {34, 3}, + ['{'] = {34, 3}, + [']'] = {37, 3}, + ['}'] = {37, 3}, + ['\\'] = {40, 3}, + ['|'] = {40, 3}, + + ['a'] = { 5, 6}, + ['A'] = { 5, 6}, + ['s'] = { 8, 6}, + ['S'] = { 8, 6}, + ['d'] = {11, 6}, + ['D'] = {11, 6}, + ['f'] = {14, 6}, + ['F'] = {14, 6}, + ['g'] = {17, 6}, + ['G'] = {17, 6}, + ['h'] = {20, 6}, + ['H'] = {20, 6}, + ['j'] = {23, 6}, + ['J'] = {23, 6}, + ['k'] = {26, 6}, + ['K'] = {26, 6}, + ['l'] = {29, 6}, + ['L'] = {29, 6}, + [';'] = {32, 6}, + [':'] = {32, 6}, + ['\''] = {35, 6}, + ['"'] = {35, 6}, + ['\n'] = {38, 6}, + + ['z'] = { 6, 9}, + ['Z'] = { 6, 9}, + ['x'] = { 9, 9}, + ['X'] = { 9, 9}, + ['c'] = {12, 9}, + ['C'] = {12, 9}, + ['v'] = {15, 9}, + ['V'] = {15, 9}, + ['b'] = {18, 9}, + ['B'] = {18, 9}, + ['n'] = {21, 9}, + ['N'] = {21, 9}, + ['m'] = {24, 9}, + ['M'] = {24, 9}, + [','] = {27, 9}, + ['<'] = {27, 9}, + ['.'] = {30, 9}, + ['>'] = {30, 9}, + ['/'] = {33, 9}, + ['?'] = {33, 9}, + + [' '] = {18, 12}, +}; + +static int char_distance(char a, char b) { + const struct coords *ac = &key_coords[(unsigned char)a], *bc = &key_coords[(unsigned char)b]; + int dx = abs(ac->x - bc->x); + int dy = abs(ac->y - bc->y); + return dx + dy; +} + +int typo_distance(const char *actual, const char *expected) { + // This is the Wagner-Fischer algorithm for Levenshtein distance, using + // Manhattan distance on the keyboard for individual characters. + + const int insert_cost = 12; + + size_t rows = strlen(actual) + 1; + size_t cols = strlen(expected) + 1; + + int arr0[cols], arr1[cols]; + int *row0 = arr0, *row1 = arr1; + + for (size_t j = 0; j < cols; ++j) { + row0[j] = insert_cost * j; + } + + for (size_t i = 1; i < rows; ++i) { + row1[0] = row0[0] + insert_cost; + + char a = actual[i - 1]; + for (size_t j = 1; j < cols; ++j) { + char b = expected[j - 1]; + int cost = row0[j - 1] + char_distance(a, b); + int del_cost = row0[j] + insert_cost; + if (del_cost < cost) { + cost = del_cost; + } + int ins_cost = row1[j - 1] + insert_cost; + if (ins_cost < cost) { + cost = ins_cost; + } + row1[j] = cost; + } + + int *tmp = row0; + row0 = row1; + row1 = tmp; + } + + return row0[cols - 1]; +} diff --git a/typo.h b/typo.h new file mode 100644 index 0000000..fc2392f --- /dev/null +++ b/typo.h @@ -0,0 +1,26 @@ +/********************************************************************* + * bfs * + * Copyright (C) 2016 Tavian Barnes * + * * + * This program is free software. It comes without any warranty, to * + * the extent permitted by applicable law. You can redistribute it * + * and/or modify it under the terms of the Do What The Fuck You Want * + * To Public License, Version 2, as published by Sam Hocevar. See * + * the COPYING file or http://www.wtfpl.net/ for more details. * + *********************************************************************/ + +#ifndef BFS_TYPO_H +#define BFS_TYPO_H + +/** + * Find the "typo" distance between two strings. + * + * @param actual + * The actual string typed by the user. + * @param expected + * The expected valid string. + * @return The distance between the two strings. + */ +int typo_distance(const char *actual, const char *expected); + +#endif // BFS_TYPO_H -- cgit v1.2.3