<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html> <head> <!-- Generated by HsColour, http://www.cs.york.ac.uk/fp/darcs/hscolour/ --> <title>Data/Text/Search.hs</title> <link type='text/css' rel='stylesheet' href='hscolour.css' /> </head> <body> <pre><a name="line-1"></a><span class='hs-comment'>{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}</span> <a name="line-2"></a> <a name="line-3"></a><span class='hs-comment'>-- |</span> <a name="line-4"></a><span class='hs-comment'>-- Module : Data.Text.Search</span> <a name="line-5"></a><span class='hs-comment'>-- Copyright : (c) Bryan O'Sullivan 2009</span> <a name="line-6"></a><span class='hs-comment'>--</span> <a name="line-7"></a><span class='hs-comment'>-- License : BSD-style</span> <a name="line-8"></a><span class='hs-comment'>-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,</span> <a name="line-9"></a><span class='hs-comment'>-- duncan@haskell.org</span> <a name="line-10"></a><span class='hs-comment'>-- Stability : experimental</span> <a name="line-11"></a><span class='hs-comment'>-- Portability : GHC</span> <a name="line-12"></a><span class='hs-comment'>--</span> <a name="line-13"></a><span class='hs-comment'>-- Fast substring search for 'Text', based on work by Boyer, Moore,</span> <a name="line-14"></a><span class='hs-comment'>-- Horspool, Sunday, and Lundh.</span> <a name="line-15"></a><span class='hs-comment'>--</span> <a name="line-16"></a><span class='hs-comment'>-- References:</span> <a name="line-17"></a><span class='hs-comment'>-- </span> <a name="line-18"></a><span class='hs-comment'>-- * R. S. Boyer, J. S. Moore: A Fast String Searching Algorithm.</span> <a name="line-19"></a><span class='hs-comment'>-- Communications of the ACM, 20, 10, 762-772 (1977)</span> <a name="line-20"></a><span class='hs-comment'>--</span> <a name="line-21"></a><span class='hs-comment'>-- * R. N. Horspool: Practical Fast Searching in Strings. Software -</span> <a name="line-22"></a><span class='hs-comment'>-- Practice and Experience 10, 501-506 (1980)</span> <a name="line-23"></a><span class='hs-comment'>--</span> <a name="line-24"></a><span class='hs-comment'>-- * D. M. Sunday: A Very Fast Substring Search Algorithm.</span> <a name="line-25"></a><span class='hs-comment'>-- Communications of the ACM, 33, 8, 132-142 (1990)</span> <a name="line-26"></a><span class='hs-comment'>--</span> <a name="line-27"></a><span class='hs-comment'>-- * F. Lundh: The Fast Search Algorithm.</span> <a name="line-28"></a><span class='hs-comment'>-- <<a href="http://effbot.org/zone/stringlib.htm">http://effbot.org/zone/stringlib.htm</a>> (2006)</span> <a name="line-29"></a> <a name="line-30"></a><span class='hs-keyword'>module</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Search</span> <a name="line-31"></a> <span class='hs-layout'>(</span> <a name="line-32"></a> <span class='hs-varid'>indices</span> <a name="line-33"></a> <span class='hs-layout'>)</span> <span class='hs-keyword'>where</span> <a name="line-34"></a> <a name="line-35"></a><span class='hs-keyword'>import</span> <span class='hs-keyword'>qualified</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Array</span> <span class='hs-keyword'>as</span> <span class='hs-conid'>A</span> <a name="line-36"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Word</span> <span class='hs-layout'>(</span><span class='hs-conid'>Word64</span><span class='hs-layout'>)</span> <a name="line-37"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Internal</span> <span class='hs-layout'>(</span><span class='hs-conid'>Text</span><span class='hs-layout'>(</span><span class='hs-keyglyph'>..</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span> <a name="line-38"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Fusion</span><span class='hs-varop'>.</span><span class='hs-conid'>Internal</span> <span class='hs-layout'>(</span><span class='hs-conid'>PairS</span><span class='hs-layout'>(</span><span class='hs-keyglyph'>..</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span> <a name="line-39"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Bits</span> <span class='hs-layout'>(</span><span class='hs-layout'>(</span><span class='hs-varop'>.|.</span><span class='hs-layout'>)</span><span class='hs-layout'>,</span> <span class='hs-layout'>(</span><span class='hs-varop'>.&.</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span> <a name="line-40"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>UnsafeShift</span> <span class='hs-layout'>(</span><span class='hs-varid'>shiftL</span><span class='hs-layout'>)</span> <a name="line-41"></a> <a name="line-42"></a><a name="indices"></a><span class='hs-comment'>-- | /O(n+m)/ Find the offsets of all non-overlapping indices of</span> <a name="line-43"></a><span class='hs-comment'>-- @needle@ within @haystack@. The offsets returned represent</span> <a name="line-44"></a><span class='hs-comment'>-- locations in the low-level array.</span> <a name="line-45"></a><span class='hs-comment'>--</span> <a name="line-46"></a><span class='hs-comment'>-- In (unlikely) bad cases, this algorithm's complexity degrades</span> <a name="line-47"></a><span class='hs-comment'>-- towards /O(n*m)/.</span> <a name="line-48"></a><span class='hs-definition'>indices</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Text</span> <span class='hs-comment'>-- ^ Substring to search for (@needle@)</span> <a name="line-49"></a> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Text</span> <span class='hs-comment'>-- ^ Text to search in (@haystack@)</span> <a name="line-50"></a> <span class='hs-keyglyph'>-></span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>Int</span><span class='hs-keyglyph'>]</span> <a name="line-51"></a><span class='hs-definition'>indices</span> <span class='hs-sel'>_needle</span><span class='hs-keyglyph'>@</span><span class='hs-layout'>(</span><span class='hs-conid'>Text</span> <span class='hs-varid'>narr</span> <span class='hs-varid'>noff</span> <span class='hs-varid'>nlen</span><span class='hs-layout'>)</span> <span class='hs-sel'>_haystack</span><span class='hs-keyglyph'>@</span><span class='hs-layout'>(</span><span class='hs-conid'>Text</span> <span class='hs-varid'>harr</span> <span class='hs-varid'>hoff</span> <span class='hs-varid'>hlen</span><span class='hs-layout'>)</span> <a name="line-52"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>nlen</span> <span class='hs-varop'>==</span> <span class='hs-num'>1</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>scanOne</span> <span class='hs-layout'>(</span><span class='hs-varid'>nindex</span> <span class='hs-num'>0</span><span class='hs-layout'>)</span> <a name="line-53"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>nlen</span> <span class='hs-varop'><=</span> <span class='hs-num'>0</span> <span class='hs-varop'>||</span> <span class='hs-varid'>ldiff</span> <span class='hs-varop'><</span> <span class='hs-num'>0</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>[]</span> <a name="line-54"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>otherwise</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>scan</span> <span class='hs-num'>0</span> <a name="line-55"></a> <span class='hs-keyword'>where</span> <a name="line-56"></a> <span class='hs-varid'>ldiff</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>hlen</span> <span class='hs-comment'>-</span> <span class='hs-varid'>nlen</span> <a name="line-57"></a> <span class='hs-varid'>nlast</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>nlen</span> <span class='hs-comment'>-</span> <span class='hs-num'>1</span> <a name="line-58"></a> <span class='hs-varid'>z</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>nindex</span> <span class='hs-varid'>nlast</span> <a name="line-59"></a> <span class='hs-varid'>nindex</span> <span class='hs-varid'>k</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>A</span><span class='hs-varop'>.</span><span class='hs-varid'>unsafeIndex</span> <span class='hs-varid'>narr</span> <span class='hs-layout'>(</span><span class='hs-varid'>noff</span><span class='hs-varop'>+</span><span class='hs-varid'>k</span><span class='hs-layout'>)</span> <a name="line-60"></a> <span class='hs-varid'>hindex</span> <span class='hs-varid'>k</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>A</span><span class='hs-varop'>.</span><span class='hs-varid'>unsafeIndex</span> <span class='hs-varid'>harr</span> <span class='hs-layout'>(</span><span class='hs-varid'>hoff</span><span class='hs-varop'>+</span><span class='hs-varid'>k</span><span class='hs-layout'>)</span> <a name="line-61"></a> <span class='hs-varid'>hindex'</span> <span class='hs-varid'>k</span> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>k</span> <span class='hs-varop'>==</span> <span class='hs-varid'>hlen</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>0</span> <a name="line-62"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>otherwise</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>A</span><span class='hs-varop'>.</span><span class='hs-varid'>unsafeIndex</span> <span class='hs-varid'>harr</span> <span class='hs-layout'>(</span><span class='hs-varid'>hoff</span><span class='hs-varop'>+</span><span class='hs-varid'>k</span><span class='hs-layout'>)</span> <a name="line-63"></a> <span class='hs-layout'>(</span><span class='hs-varid'>mask</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Word64</span><span class='hs-layout'>)</span> <span class='hs-conop'>:*:</span> <span class='hs-varid'>skip</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>buildTable</span> <span class='hs-num'>0</span> <span class='hs-num'>0</span> <span class='hs-layout'>(</span><span class='hs-varid'>nlen</span><span class='hs-comment'>-</span><span class='hs-num'>2</span><span class='hs-layout'>)</span> <a name="line-64"></a> <span class='hs-varid'>buildTable</span> <span class='hs-varop'>!</span><span class='hs-varid'>i</span> <span class='hs-varop'>!</span><span class='hs-varid'>msk</span> <span class='hs-varop'>!</span><span class='hs-varid'>skp</span> <a name="line-65"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>i</span> <span class='hs-varop'>>=</span> <span class='hs-varid'>nlast</span> <span class='hs-keyglyph'>=</span> <span class='hs-layout'>(</span><span class='hs-varid'>msk</span> <span class='hs-varop'>.|.</span> <span class='hs-varid'>swizzle</span> <span class='hs-varid'>z</span><span class='hs-layout'>)</span> <span class='hs-conop'>:*:</span> <span class='hs-varid'>skp</span> <a name="line-66"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>otherwise</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>buildTable</span> <span class='hs-layout'>(</span><span class='hs-varid'>i</span><span class='hs-varop'>+</span><span class='hs-num'>1</span><span class='hs-layout'>)</span> <span class='hs-layout'>(</span><span class='hs-varid'>msk</span> <span class='hs-varop'>.|.</span> <span class='hs-varid'>swizzle</span> <span class='hs-varid'>c</span><span class='hs-layout'>)</span> <span class='hs-varid'>skp'</span> <a name="line-67"></a> <span class='hs-keyword'>where</span> <span class='hs-varid'>c</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>nindex</span> <span class='hs-varid'>i</span> <a name="line-68"></a> <span class='hs-varid'>skp'</span> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>c</span> <span class='hs-varop'>==</span> <span class='hs-varid'>z</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>nlen</span> <span class='hs-comment'>-</span> <span class='hs-varid'>i</span> <span class='hs-comment'>-</span> <span class='hs-num'>2</span> <a name="line-69"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>otherwise</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>skp</span> <a name="line-70"></a> <span class='hs-varid'>swizzle</span> <span class='hs-varid'>k</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>1</span> <span class='hs-varop'>`shiftL`</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>k</span> <span class='hs-varop'>.&.</span> <span class='hs-num'>0x3f</span><span class='hs-layout'>)</span> <a name="line-71"></a> <span class='hs-varid'>scan</span> <span class='hs-varop'>!</span><span class='hs-varid'>i</span> <a name="line-72"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>i</span> <span class='hs-varop'>></span> <span class='hs-varid'>ldiff</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>[]</span> <a name="line-73"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>c</span> <span class='hs-varop'>==</span> <span class='hs-varid'>z</span> <span class='hs-varop'>&&</span> <span class='hs-varid'>candidateMatch</span> <span class='hs-num'>0</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>i</span> <span class='hs-conop'>:</span> <span class='hs-varid'>scan</span> <span class='hs-layout'>(</span><span class='hs-varid'>i</span> <span class='hs-varop'>+</span> <span class='hs-varid'>nlen</span><span class='hs-layout'>)</span> <a name="line-74"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>otherwise</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>scan</span> <span class='hs-layout'>(</span><span class='hs-varid'>i</span> <span class='hs-varop'>+</span> <span class='hs-varid'>delta</span><span class='hs-layout'>)</span> <a name="line-75"></a> <span class='hs-keyword'>where</span> <span class='hs-varid'>c</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>hindex</span> <span class='hs-layout'>(</span><span class='hs-varid'>i</span> <span class='hs-varop'>+</span> <span class='hs-varid'>nlast</span><span class='hs-layout'>)</span> <a name="line-76"></a> <span class='hs-varid'>candidateMatch</span> <span class='hs-varop'>!</span><span class='hs-varid'>j</span> <a name="line-77"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>j</span> <span class='hs-varop'>>=</span> <span class='hs-varid'>nlast</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>True</span> <a name="line-78"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>hindex</span> <span class='hs-layout'>(</span><span class='hs-varid'>i</span><span class='hs-varop'>+</span><span class='hs-varid'>j</span><span class='hs-layout'>)</span> <span class='hs-varop'>/=</span> <span class='hs-varid'>nindex</span> <span class='hs-varid'>j</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>False</span> <a name="line-79"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>otherwise</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>candidateMatch</span> <span class='hs-layout'>(</span><span class='hs-varid'>j</span><span class='hs-varop'>+</span><span class='hs-num'>1</span><span class='hs-layout'>)</span> <a name="line-80"></a> <span class='hs-varid'>delta</span> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>nextInPattern</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>nlen</span> <span class='hs-varop'>+</span> <span class='hs-num'>1</span> <a name="line-81"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>c</span> <span class='hs-varop'>==</span> <span class='hs-varid'>z</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>skip</span> <span class='hs-varop'>+</span> <span class='hs-num'>1</span> <a name="line-82"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>otherwise</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>1</span> <a name="line-83"></a> <span class='hs-varid'>nextInPattern</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>mask</span> <span class='hs-varop'>.&.</span> <span class='hs-varid'>swizzle</span> <span class='hs-layout'>(</span><span class='hs-varid'>hindex'</span> <span class='hs-layout'>(</span><span class='hs-varid'>i</span><span class='hs-varop'>+</span><span class='hs-varid'>nlen</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span> <span class='hs-varop'>==</span> <span class='hs-num'>0</span> <a name="line-84"></a> <span class='hs-varid'>scanOne</span> <span class='hs-varid'>c</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>loop</span> <span class='hs-num'>0</span> <a name="line-85"></a> <span class='hs-keyword'>where</span> <span class='hs-varid'>loop</span> <span class='hs-varop'>!</span><span class='hs-varid'>i</span> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>i</span> <span class='hs-varop'>>=</span> <span class='hs-varid'>hlen</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>[]</span> <a name="line-86"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>hindex</span> <span class='hs-varid'>i</span> <span class='hs-varop'>==</span> <span class='hs-varid'>c</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>i</span> <span class='hs-conop'>:</span> <span class='hs-varid'>loop</span> <span class='hs-layout'>(</span><span class='hs-varid'>i</span><span class='hs-varop'>+</span><span class='hs-num'>1</span><span class='hs-layout'>)</span> <a name="line-87"></a> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>otherwise</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>loop</span> <span class='hs-layout'>(</span><span class='hs-varid'>i</span><span class='hs-varop'>+</span><span class='hs-num'>1</span><span class='hs-layout'>)</span> <a name="line-88"></a><span class='hs-comment'>{-# INLINE indices #-}</span> </pre></body> </html>