
%%%% Converting the file to a list of ASCII codes of printable characters 

  file_to_list(File,L) :- 
     see(File), 
     read_to_list(L),
     seen.

  read_to_list(L) :- 
        get0(X),!, 
       (X = -1  ->   
          L=[] | 
          (printable(X) ->  
             (L= [X|L1], read_to_list(L1)) | 
             read_to_list(L))). 


  printable(X) :- X>= 32. 
 
  alphabetical(X) :- X >= 65, X =< 90.   %% A...Z 
  alphabetical(X) :- X >= 97, X =< 122.  %% a...z 
  numerical(X) :- X >= 48, X =< 57.      %% 0..9

  alphanum(X) :- alphabetical(X) ; numerical(X). 


%% tokenize a list L of ASCII codes, and convert to a list of characters and atoms R. 

  get_tokens(L,R) :- tokenize(L,R,l(X,X)).

  %% The last argument of tokenize is a difference list of characters in the current token that 
  %% have already been read


  tokenize([],[],l(L,Y)) :- L == Y, !. 
  tokenize([],[T],l(L,Y)):- Y= [], atom_codes(T,L). 
  tokenize([C|R],S,l(L,Y)) :- 
      L == Y, !, 
       (C=32 -> tokenize(R,S,l(L,Y)) |
          (alphabetical(C) -> 
               (Y = [C|Y1], tokenize(R,S,l(L,Y1)))      
                       | 
               (atom_codes(T,[C]), 
                S = [T|S1],
                tokenize(R,S1,l(Y2,Y2))))). 
  tokenize([C|R],S,l(L,Y)) :- 
      (alphanum(C) -> 
          (Y = [C|Y1], tokenize(R,S,l(L,Y1))) 
        | 
          (Y = [], 
           atom_codes(T1,L), 
           S = [T1|S1],
           tokenize([C|R],S1,l(Y2,Y2)))). 


