Docs GODI Archive
Projects Blog Link DB

Search GODI:


More options
File lib/felix/rtl/sm_srcloc.h GODI Package apps-felix
 
   sm_srcloc.h  
#line 4740 "lpsrc/sm.pak"
// srcloc.h            see license.txt for copyright and terms of use
// source location information, efficiently represented as one word

// The fundamental assumption in this module is that source location
// information is frequently created, stored and passed around, but
// infrequently decoded into human-readable form.  Therefore the
// module uses a single word to store the information, and appeals
// to several index structures when decoding is necessary.
//
// Since decoding, when it happens, also usually has high locality,
// the data structures include caches to make accesses to nearby
// locations fast.
//
// No attempt is made to fold creation of SourceLocs into other
// file-processing activities, such as traditional lexical analysis.
// The complexity of doing that would be substantial, with little gain
// in efficiency, due to the large buffer caches in modern OSes.  The
// main drawback is the inability to work with non-seekable inputs
// (like pipes) because we consume the whole input when its line
// counts are computed.

#ifndef SRCLOC_H
#define SRCLOC_H

#include "sm_str.h"
#include "sm_objlist.h"
#include "flx_rtl_config.hpp"

class HashLineMap;    // hashline.h


// This is a source location.  It's interpreted as an integer
// specifying the byte offset within a hypothetical file created by
// concatenating all the sources together.  Its type is 'enum' so I
// can overload functions to accept SourceLoc without confusion.
// I assume the compiler will use a machine word for this (and check
// that assumption in the .cc file).
//
// I would love to be able to annotate this so that the C++ compiler
// would not allow variables of this type to be created
// uninitialized.. that's the one drawback of calling this an 'enum'
// instead of a 'class': I don't get to write a constructor.
enum SourceLoc {
  // entity is defined within the translator's initialization code
  SL_INIT=-1,

  // location is unknown for some reason
  SL_UNKNOWN=0
};


// This class manages all the data associated with creating and
// interpreting SourceLocs.  It's expected to be a singleton in the
// program, though within this module that assumption is confined to
// the 'toString' function at the end.
class FLX_RTL_EXTERN SourceLocManager {
private:     // types
  // a triple which identifies a line boundary in a file (it's
  // implicit which file it is) with respect to all of the relevant
  // spaces
  class Marker {
  public:
    // character offset, starting with 0
    int charOffset;

    // line offset, starting with 1
    int lineOffset;

    // offset into the 'lineLengths' array; this is not simply
    // lineOffset-1 because of the possible presence of lines with
    // length longer than 254 chars
    int arrayOffset;

  public:
    Marker() {}      // for creation in arrays
    Marker(int c, int L, int a)
      : charOffset(c), lineOffset(L), arrayOffset(a) {}
    Marker(Marker const &obj)
      : DMEMB(charOffset), DMEMB(lineOffset), DMEMB(arrayOffset) {}
  };

public:      // types
  // describes a file we know about
  class File {
  public:    // data
    // file name; we consider two files to be the same if and only
    // if their names are equal, i.e. there is no checking done to
    // see if their names happen to be aliases in the filesystem
    sm_string name;

    // start offset in the SourceLoc space
    SourceLoc startLoc;

    // number of chars in the file
    int numChars;

    // number of lines in the file
    int numLines;

    // average number of chars per line; this is used for estimating
    // whether the index should be consulted for some lookups (and
    // it's stored instead of computed to save a division)
    int avgCharsPerLine;

    // known #line directives for this file; NULL if none are known
    HashLineMap *hashLines;          // (nullable owner)

  private:   // data
    // an array of line lengths; to handle lines longer than 255
    // chars, we use runs of '\xFF' chars to (in unary) encode
    // multiples of 254 (one less than 255) chars, plus the final
    // short count to give the total length
    unsigned char *lineLengths;      // (owner)

    // # of elements in 'lineLengths'
    int lineLengthsSize;

    // this marker and offset can name an arbitrary point
    // in the array, including those that are not at the
    // start of a line; we move this around when searching
    // within the array
    Marker marker;
    int markerCol;      // 1-based column; it's usually 1

    // an index built on top of 'lineLengths' for faster random access
    Marker *index;                   // (owner)

    // # of elements in 'index'
    int indexSize;

  private:   // funcs
    File(File&);                     // disallowed
    void resetMarker();
    void advanceMarker();

  public:    // funcs
    // this builds both the array and the index
    File(char const *name, SourceLoc startLoc);
    ~File();

    // line number to character offset
    int lineToChar(int lineNum);

    // line/col to offset, with truncation if col exceeds line length
    int lineColToChar(int lineNum, int col);

    // char offset to line/col
    void charToLineCol(int offset, int &line, int &col);

    // true if this file contains the specified location
    bool hasLoc(SourceLoc sl) const
      { return toInt(startLoc) <= sl &&
                                  sl <= toInt(startLoc) + numChars; }

    // call this time each time a #line directive is encountered;
    // same semantics as HashLineMap::addHashLine
    void addHashLine(int ppLine, int origLine, char const *origFname);
    void doneAdding();
  };

  // this is used for SourceLocs where the file isn't reliably
  // available, yet we'd like to be able to store some location
  // information anyway; the queries below just return the static
  // information stored, and incremental update is impossible
  class StaticLoc {
  public:
    sm_string name;      // file name
    int offset;       // char offset
    int line, col;    // line,col

  public:
    StaticLoc(char const *n, int o, int L, int c)
      : name(n), offset(o), line(L), col(c) {}
    StaticLoc(StaticLoc const &obj)
      : DMEMB(name), DMEMB(offset), DMEMB(line), DMEMB(col) {}
    ~StaticLoc();
  };

private:     // data
  // list of files; it would be possible to use a data structure
  // that is faster to search, but the cache ought to exploit
  // query locality to the extent that it doesn't matter
  ObjList<File> files;

  // most-recently accessed File; this is a cache
  File *recent;                      // (serf)

  // list of StaticLocs; any SourceLoc less than 0 is interpreted
  // as an index into this list
  ObjList<StaticLoc> statics;

  // next source location to assign
  SourceLoc nextLoc;

  // next static (negative) location
  SourceLoc nextStaticLoc;

public:      // data
  // number of static locations at which we print a warning message;
  // defaults to 100
  int maxStaticLocs;

  // when true, we automatically consult the #line maps when decoding;
  // defaults to true; NOTE: when this is true, encode and decode are
  // not necessarily inverses of each other
  bool useHashLines;

  // count the # of times we had to truncate a char offset because
  // the #line map pointed at a line shorter than the column number
  // we expected to use; this is initially 0; calling code can use
  // this to tell if the offset information across a given call or
  // sequence of calls is perfect or truncated
  static int shortLineCount;

private:     // funcs
  // let File know about these functions
  friend class SourceLocManager::File;

  static SourceLoc toLoc(int L) {
    SourceLoc ret = (SourceLoc)L;

    // in debug mode, we verify that SourceLoc is wide enough
    // to encode this integer
    xassertdb(toInt(ret) == L);

    return ret;
  }
  static int toInt(SourceLoc loc) { return (int)loc; }

  File *findFile(char const *name);
  File *getFile(char const *name);

  public:                       // dsw: I need this so I have an object to annotate with a VoidVoidDict
  File *findFileWithLoc(SourceLoc loc);
  private:
  StaticLoc const *getStatic(SourceLoc loc);

public:      // funcs
  SourceLocManager();
  ~SourceLocManager();

  // origins:
  //   character offsets start at 0
  //   lines start at 1
  //   columns start at 1

  // encode from scratch
  SourceLoc encodeOffset(char const *filename, int charOffset);
  SourceLoc encodeBegin(char const *filename)
    { return encodeOffset(filename, 0 /*offset*/); }
  SourceLoc encodeLineCol(char const *filename, int line, int col);

  // some care is required with 'encodeStatic', since each call makes
  // a new location with a new entry in the static array to back it
  // up, so the caller should ensure a given static location is not
  // encoded more than once, if possible
  SourceLoc encodeStatic(StaticLoc const &obj);
  SourceLoc encodeStatic(char const *fname, int offset, int line, int col)
    { return encodeStatic(StaticLoc(fname, offset, line, col)); }
  static bool isStatic(SourceLoc loc) { return toInt(loc) <= 0; }

  // encode incremental; these are the methods we expect are called
  // the most frequently; this interface is supposed to allow an
  // implementation which uses explicit line/col, even though that
  // is not what is used here
  static SourceLoc advCol(SourceLoc base, int colOffset)
    { xassert(!isStatic(base)); return toLoc(toInt(base) + colOffset); }
  static SourceLoc advLine(SourceLoc base)     // from end of line to beginning of next
    { xassert(!isStatic(base)); return toLoc(toInt(base) + 1); }
  static SourceLoc advText(SourceLoc base, char const * /*text*/, int textLen)
    { xassert(!isStatic(base)); return toLoc(toInt(base) + textLen); }

  // decode
  void decodeOffset(SourceLoc loc, char const *&filename, int &charOffset);
  void decodeLineCol(SourceLoc loc, char const *&filename, int &line, int &col);

  // more specialized decode
  char const *getFile(SourceLoc loc);
  int getOffset(SourceLoc loc);
  int getLine(SourceLoc loc);
  int getCol(SourceLoc loc);

  // get access to the File itself, for adding #line directives
  File *getInternalFile(char const *fname)
    { return getFile(fname); }

  // render as sm_string in "file:line:col" format
  sm_string getString(SourceLoc loc);

  // "line:col" format
  sm_string getLCString(SourceLoc loc);
};


// singleton pointer, set automatically by the constructor
extern SourceLocManager *sourceLocManager;

// dsw: So that gdb can find it please DO NOT inline this; also the
// unique public name is intensional: I don't want gdb doing
// overloading and sometimes getting it wrong, which it does
FLX_RTL_EXTERN sm_string locToStr(SourceLoc sl);

inline sm_string toString(SourceLoc sl)
  { return locToStr(sl); }

inline sm_stringBuilder& operator<< (sm_stringBuilder &sb, SourceLoc sl)
  { return sb << toString(sl); }

inline sm_string toLCString(SourceLoc sl)
  { return sourceLocManager->getLCString(sl); }


// macro for obtaining a source location that points at the
// point in the source code where this macro is invoked
#define HERE_SOURCELOC \
  (sourceLocManager->encodeStatic(__FILE__, 0, __LINE__, 1))


// it's silly to demand mention of 'SourceLocManager' just to update
// the locations, esp. since SourceLoc is its own type and therefore
// overloading will avoid any possible collisions
inline SourceLoc advCol(SourceLoc base, int colOffset)
  { return SourceLocManager::advCol(base, colOffset); }
inline SourceLoc advLine(SourceLoc base)
  { return SourceLocManager::advLine(base); }
inline SourceLoc advText(SourceLoc base, char const *text, int textLen)
  { return SourceLocManager::advText(base, text, textLen); }


#endif // SRCLOC_H

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml