BMSearch  
Boyer-Moore search algorithm  

This is a simple Amiga program showing the famous Boyer-Moore search algorithm 
which is one of the fastest. I copied the actual algorithm code from somewhere and 
amigified it, but I can't for the life of me remember where.. 

I have not employed this code in my main FSearch program, cause the ratio of 
speed vs. pain-in-the-ass was too small for it to be worth the effort and extra code. 

18/3/02 - Fixed bug in initialization step 1 (reported by Ties Molenhuis)
 

#include <exec/exec.h> 
#include <exec/types.h> 
#include <exec/execbase.h> 
#include <exec/memory.h> 
#include <dos/dosextens.h> 
#include <dos/rdargs.h> 
#include <dos/dostags.h> 

#include <string.h> 
#include <stdio.h> 
#include <ctype.h> 
#include <dos.h> 

#include <proto/dos.h> 
#include <proto/exec.h> 

__inline int max(int a, int b) {return a > b ? a : b;} 

int skip[256]; 

#define MaxPatLength 100 
int shift[MaxPatLength]; 

// prototype 
int bmsearch (char *, char *); 

int main(void) 
{ 
struct RDArgs *rdargs = NULL; 
UBYTE *file, *str; 
LONG  rc, args[3]; 
struct FileInfoBlock *fib=NULL; 
UBYTE *buff=NULL; 
BPTR fp; 
LONG buffsize, sz; 
int err = 0; 

rc = 10;   /* return code */ 

// note we don't open DOSBase since SC does it automatically for us 

memset((char *)args, 0, sizeof(args)); 
rdargs = ReadArgs("FILE/A,STRING/A", args, NULL); 
if (!rdargs) 
{   PutStr ("Wrong arguments\n"); 
    goto endprog; 
} 
file   = (UBYTE *)args[0]; 
str    = (UBYTE *)args[1]; 

// ------------------------ read the whole file into buff 

if (!(fp = Open (file, MODE_OLDFILE)))  
{  PutStr ("Could not open file\n"); 
   goto endprog; 
} 

if (fib = (struct FileInfoBlock *)AllocVec(sizeof(struct FileInfoBlock), MEMF_CLEAR)) 
{   if (ExamineFH(fp, fib)) 
    {   buffsize = fib->fib_Size; 
        if (buff = (UBYTE *)AllocVec(buffsize + 32, MEMF_CLEAR)) 
        {   sz = Read (fp, buff, buffsize); 
            if (sz != buffsize) ++err; 
        } 
    } 
    FreeVec (fib); 
} 
Close (fp); 

if (err) 
{  PutStr ("Error reading file\n"); 
   goto endprog; 
} 
else if (!buff) 
{  PutStr ("No memory!\n"); 
   goto endprog; 
} 

// ---------------------- call BM code 

if (bmsearch (str, buff)) rc= 0; 

// ---------------------- exit, stage left.. 

endprog : 
if (rdargs)  FreeArgs (rdargs); 
if (buff)    FreeVec (buff); 
return (rc); 
} 
  

/* ###################################################################### 
   This is the actual Boyer-Moore search algorithm 

   Returns index of first occurrence of string p in a. 
   Returns length of a if there is no occurrence of p in a. 

   ###################################################################### */ 

int bmsearch (char *p, char *a) 
{ 
   int M, M1, N, right_end, sk, sh; 
   int ended, j, len[MaxPatLength]; 
   int i;                 /* offset from right in a and p */ 
   char *match; 
   char outline[60]; 
   int c; 

   M = strlen(p);  
   M1 = M - 1; 
   N = strlen(a); 
   right_end = M1;     /* position in a */ 

   /* ----------------------------------------------------------------- 
   Setup - Initialise SKIP array : 
   initializes the skip array. skip[c] = j if the rightmost occurrence of  
   char c in p is in location j.   
   skip[c] = length of p if there is no occurrence of c. 
   ----------------------------------------------------------------- */ 

   M = strlen(p); M1 = M - 1; 
   for (i = 0; i < 256; i++) skip[i] = M; 
   for (i = 0; p[i]; i++) skip[p[i]] = M1 - i; 

   /* ----------------------------------------------------------------- 
   Setup2 - Initialise SHIFT array : 
   initializes the shift array for the "pattern" string p. 
   ----------------------------------------------------------------- */ 

   M = strlen(p); 
   M1 = M - 1; 

   /* 1. Set len[i] = number of characters right ended at i which match the 
      characters right ended at p[M1]. */ 
   for (i = 1; i < M; i++) 
   {  
      for (j = 0; j < M - i && p[M1 - j] == p[M1 - i - j]; j++); 
      len[i] = j; 
   } 

   /* 2. If p[M1] does not occur again in p, then this initialization 
      would be the proper shift */ 
   shift[0] = 1; 
   for (i = 1; i < M; i++) shift[i] = M; 

   /* 3. Fix up by shifting to the rightmost occurrence of the matched chars */ 
      for (i = M1; i > 0; i--) shift[len[i]] = i; 

   /* 4. Fix up by considering matches that would run off the end of the  
      pattern */ 
   ended = 0; 
   for (i = 0; i < M; i++) 
   { 
      if (len[i] == M1 - i) ended = i; 
      if (ended) shift[i] = ended; 
   } 

   /* ----------------------------------------------------------------- 
              Do the search..    
   ----------------------------------------------------------------- */ 

  while (right_end < N) 
  { 
    /* 1. Get the offset from right of the first non-match */ 
    for (i = 0; i < M && a[right_end - i] == p[M1 - i]; i++); 

    if (i == M)  
    {   match = &a[right_end - M1];         /* match was found */ 
        // put it into a 50 char buffer & print it 
        for (c=0; c<50; ++c) 
        {   if (!match[c] || match[c] == '\n')  
            {  outline[c]=0;  c=50; } 
            else outline[c] = match[c]; 
        } 
        Printf ("BYTE %ld: %s\n", (LONG)right_end - M1, outline); 
        i = M-2; 
    } 

    /* 2. Figure skip right that would align current character in a  
       with the rightmost occurrence (if any) of that character in p */ 
    sk = skip[a[right_end - i]]; 

    /* 3. Figure shift right that would align the current matched  
      initial segment with the next such segment in p */ 
    sh = shift[i]; 

    /* Perform the largest shift to the right as per 2 or 3 above */ 
    right_end = max(right_end - i + sk, right_end + sh); 
  } 

  return N; /* because no match was found */ 
}