Let’s write some code

Here’s the significant part of some example code to implement this index plan.

def index_csv(data_path, db_path)
  db = Xapian::WritableDatabase.new(db_path, Xapian::DB_CREATE_OR_OPEN)
  term_generator = Xapian::TermGenerator.new
  term_generator.stemmer = Xapian::Stem.new('en')
  parse_csv_file(data_path).each do |row|
    doc = Xapian::Document.new
    term_generator.document = doc
    term_generator.index_text(row['TITLE'].to_s, 1, 'S')
    term_generator.index_text(row['DESCRIPTION'].to_s, 1, 'XD')
    term_generator.index_text(row['TITLE'].to_s)
    term_generator.increase_termpos
    term_generator.index_text(row['DESCRIPTION'].to_s)
    doc.data = row.to_h.to_json
    idterm = "Q#{row['id_NUMBER']}"
    doc.add_boolean_term(idterm)
    db.replace_document(idterm, doc)
  end
end

A full copy of this code is available in code/ruby/index1.rb.

You can run this code to index a sample data file (held in data/100-objects-v1.csv) to a database at path db as follows:

$ ruby code/ruby/index1.rb data/100-objects-v1.csv db