jekyll-sqlite
Advanced tools
| # frozen_string_literal: true | ||
| require_relative "lib/jekyll-sqlite/version" | ||
| Gem::Specification.new do |spec| | ||
| spec.name = "jekyll-sqlite" | ||
| spec.license = "MIT" | ||
| spec.version = Jekyll::Sqlite::VERSION | ||
| spec.authors = ["Nemo"] | ||
| spec.email = ["jekyll-sqlite@captnemo.in"] | ||
| spec.summary = "A Jekyll plugin to use SQLite databases as a data source." | ||
| spec.homepage = "https://github.com/captn3m0/jekyll-sqlite" | ||
| spec.required_ruby_version = ">= 3.0.0" | ||
| spec.metadata["homepage_uri"] = spec.homepage | ||
| spec.metadata["source_code_uri"] = spec.homepage | ||
| spec.metadata["changelog_uri"] = "https://github.com/captn3m0/jekyll-sqlite/blob/master/CHANGELOG.md" | ||
| # Specify which files should be added to the gem when it is released. | ||
| # The `git ls-files -z` loads the files in the RubyGem that have been added into git. | ||
| spec.files = Dir.chdir(__dir__) do | ||
| `git ls-files -z`.split("\x0").reject do |f| | ||
| (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)}) | ||
| end | ||
| end | ||
| spec.bindir = "exe" | ||
| spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) } | ||
| spec.require_paths = ["lib"] | ||
| spec.add_dependency "sqlite3", "~> 1.6" | ||
| spec.metadata["rubygems_mfa_required"] = "true" | ||
| end |
+7
-2
@@ -5,3 +5,8 @@ require: rubocop-rake | ||
| NewCops: enable | ||
| Exclude: | ||
| - "node_modules/**/*" | ||
| - "tmp/**/*" | ||
| - "vendor/**/*" | ||
| - ".git/**/*" | ||
| - "test/_plugins/jekyll_sqlite_generator.rb" | ||
| Style/StringLiterals: | ||
@@ -19,2 +24,2 @@ Enabled: true | ||
| Metrics/AbcSize: | ||
| Max: 20 | ||
| Max: 15 |
+6
-0
| ## [Unreleased] | ||
| ## [0.1.4] - 2024-07-17 | ||
| - Per-page queries are now supported via a `sqlite` config block in the front matter. | ||
| - Documents support for existing site data being used within queries. | ||
| - Code cleanup and refactoring. | ||
| - Don't disable journaling on the database. | ||
| ## [0.1.3] - 2024-07-02 | ||
@@ -4,0 +10,0 @@ - First functional version |
+6
-1
@@ -9,5 +9,10 @@ # frozen_string_literal: true | ||
| # These are development dependencies | ||
| gem "jekyll", "~> 4.0" | ||
| gem "rake", "~> 13.0" | ||
| gem "jekyll", "~> 4.0" | ||
| gem "rubocop", "~> 1.21" | ||
| gem "rubocop-rake", "~> 0.6.0" | ||
| # Ruby 3.4 preparedness | ||
| gem "base64", "~> 0.2.0" | ||
| gem "bigdecimal", "~> 3.1" | ||
| gem "csv", "~> 3.3" |
| # frozen_string_literal: true | ||
| require "sqlite3" | ||
@@ -7,12 +8,13 @@ | ||
| class Generator < Jekyll::Generator | ||
| safe true | ||
| # Set to high to be higher than the Jekyll Datapages Plugin | ||
| priority :high | ||
| # Default pragma | ||
| def fast_setup(db) | ||
| db.execute("PRAGMA synchronous = OFF") | ||
| db.execute("PRAGMA journal_mode = OFF") | ||
| db.execute("PRAGMA query_only = ON") | ||
| ## | ||
| # Split the given key using dots and return the last part | ||
| # customers.order -> order | ||
| def get_tip(name) | ||
| name.split(".")[-1] | ||
| end | ||
| ## | ||
| # Get the root of where we are generating the data | ||
@@ -22,2 +24,4 @@ def get_root(root, db_name) | ||
| root = root[p] | ||
| rescue KeyError | ||
| raise "Jekyll SQLite: Invalid root. #{p} not found while iterating to #{db_name}" | ||
| end | ||
@@ -27,51 +31,45 @@ root | ||
| def gen_hash_data(root, db, db_name, query) | ||
| root ||= {} | ||
| root[db_name] = db.execute(query) | ||
| root[db_name].size | ||
| ## | ||
| # Prepare the query by binding the parameters | ||
| # Since we don't know if the query needs them | ||
| # we ignore all errors about "no such bind parameter" | ||
| def _prepare_query(stmt, params) | ||
| params.each do |key, value| | ||
| stmt.bind_param key, value | ||
| rescue StandardError => e | ||
| raise e unless e.message.include? "no such bind parameter" | ||
| end | ||
| end | ||
| def gen_nested_data(item, db, query, db_name) | ||
| item[db_name] = [] | ||
| ## | ||
| # Internal function to generate data given | ||
| # root: a Hash-Like root object (site.data, site.data.*, page.data) | ||
| # key: string as the key to use to attach the data to the root | ||
| # db: SQLite3 Database object to execute the query on | ||
| # query: string containing the query to execute | ||
| # Sets root[db_name] = ResultSet of the query, as an array | ||
| # Returns the count of the result set | ||
| def _gen_data(root, key, db, query) | ||
| db.prepare(query) do |stmt| | ||
| # We bind params, ignoring any errors | ||
| # Since there's no way to get required params | ||
| # From a statement | ||
| item.each do |key, value| | ||
| stmt.bind_param key, value | ||
| rescue StandardError # rubocop:disable Lint/SuppressedException | ||
| end | ||
| stmt.execute.each { |d| item[db_name] << d } | ||
| _prepare_query stmt, get_bind_params(root) | ||
| root[key] = stmt.execute.to_a | ||
| end | ||
| item[db_name].size | ||
| root[key].count | ||
| end | ||
| def array_gen(root, config, db_name, db) | ||
| count = 0 | ||
| root.each do |item| | ||
| # TODO: Add support for binding Arrays as well. | ||
| if item.is_a? Hash | ||
| count += gen_nested_data(item, db, config["query"], db_name) | ||
| else | ||
| Jekyll.logger.info "Jekyll SQLite:", "Item is not a hash for #{db_name}. Unsupported configuration" | ||
| end | ||
| ## | ||
| # Calls _gen_data for the given root | ||
| # iterates through the array if root is an array | ||
| def gen_data(root, ...) | ||
| if root.is_a? Array | ||
| # call gen_data for each item in the array | ||
| # and return the sum of all the counts | ||
| root.map { |item| gen_data(item, ...) }.sum | ||
| else | ||
| _gen_data(root, ...) | ||
| end | ||
| count | ||
| end | ||
| def gen_data(root, config, db_name, db) | ||
| count = 0 | ||
| if root.nil? || (root.is_a? Hash) | ||
| count = gen_hash_data(root, db, db_name, config["query"]) | ||
| elsif root.is_a? Array | ||
| count = array_gen(root, config, db_name, db) | ||
| end | ||
| count | ||
| end | ||
| def get_tip(name) | ||
| name.split(".")[-1] | ||
| end | ||
| ## | ||
| # Validate given configuration object | ||
| def validate_config(config) | ||
@@ -82,8 +80,50 @@ return false unless config.is_a? Hash | ||
| return false unless config.key?("data") | ||
| true | ||
| end | ||
| def generate(site) | ||
| gem_config = site.config['sqlite'] || [] | ||
| gem_config.each do |config| | ||
| ## pick bindable parameters | ||
| # from the root | ||
| # All primitive values are bound to the query | ||
| # Arrays and Hashes are ignored | ||
| def get_bind_params(dict) | ||
| dict.select { |_key, value| !value.is_a?(Array) && !value.is_a?(Hash) } | ||
| end | ||
| ## | ||
| # Given a configuration, generate the data | ||
| # and attach it to the given data_root | ||
| def generate_data_from_config(root, config) | ||
| key = config["data"] | ||
| query = config["query"] | ||
| file = config["file"] | ||
| SQLite3::Database.new file, readonly: true do |db| | ||
| db.results_as_hash = config.fetch("results_as_hash", true) | ||
| branch = get_root(root, key) | ||
| tip = get_tip(config["data"]) | ||
| count = gen_data(branch, tip, db, query) | ||
| Jekyll.logger.info "Jekyll SQLite:", "Loaded #{key}. Count=#{count}" | ||
| end | ||
| end | ||
| ## | ||
| # Iterate through all the pages in the site | ||
| # and generate the data from the configuration | ||
| def gen_pages(site) | ||
| site.pages.each do |page| | ||
| gen(page.data, page) | ||
| end | ||
| end | ||
| ## | ||
| # Generate the data from the configuration | ||
| # Takes as input the root where the data will be attached | ||
| # and a configuration holder, where the sqlite key can be found | ||
| # Root is either site.data or page.data | ||
| # and config_holder is either site.config or page itself. | ||
| def gen(root, config_holder) | ||
| sqlite_configs = config_holder["sqlite"] || [] | ||
| sqlite_configs.each do |config| | ||
| unless validate_config(config) | ||
@@ -93,13 +133,13 @@ Jekyll.logger.error "Jekyll SQLite:", "Invalid Configuration. Skipping" | ||
| end | ||
| d_name = config["data"] | ||
| SQLite3::Database.new config["file"], readonly: true do |db| | ||
| fast_setup db | ||
| db.results_as_hash = config.fetch("results_as_hash", true) | ||
| root = get_root(site.data, d_name) | ||
| count = gen_data(root, config, get_tip(d_name), db) | ||
| Jekyll.logger.info "Jekyll SQLite:", "Loaded #{d_name}. Count=#{count}" | ||
| end | ||
| generate_data_from_config(root, config) | ||
| end | ||
| end | ||
| ## | ||
| # Entrpoint to the generator, called by Jekyll | ||
| def generate(site) | ||
| gen(site.data, site.config) | ||
| gen_pages(site) | ||
| end | ||
| end | ||
| end |
@@ -5,4 +5,4 @@ # frozen_string_literal: true | ||
| module Sqlite | ||
| VERSION = "0.1.3" | ||
| VERSION = "0.1.4" | ||
| end | ||
| end |
+60
-0
@@ -5,5 +5,65 @@ # frozen_string_literal: true | ||
| require "rubocop/rake_task" | ||
| require "jekyll" | ||
| require "sqlite3" | ||
| RuboCop::RakeTask.new | ||
| def assert(cond, msg = "Assertion Failed") | ||
| raise msg unless cond | ||
| end | ||
| def query_db(query) | ||
| db = SQLite3::Database.new "_db/northwind.db" | ||
| db.results_as_hash = true | ||
| results = db.execute query | ||
| results[0] | ||
| end | ||
| # rubocop:disable Metrics/AbcSize | ||
| # rubocop:disable Metrics/MethodLength | ||
| def validate_json | ||
| file = "_site/data.json" | ||
| data = JSON.parse(File.read(file)) | ||
| assert data["orders"].size == 53, "Expected 53 orders, got #{data["orders"].size}" | ||
| assert data["customers"].size == 93, "Expected 93 customers, got #{data["customers"].size}" | ||
| assert data["categories"].size == 8, "Expected 93 categories, got #{data["categories"].size}" | ||
| assert data["orders"][0] == query_db("SELECT * FROM Orders LIMIT 1"), "Order Fetch Failed" | ||
| assert data["customers"][0] == query_db("SELECT * FROM Customers LIMIT 1"), "Customer Fetch Failed" | ||
| assert data["customers"][0] == query_db("SELECT * FROM Customers LIMIT 1"), "Customer Fetch Failed" | ||
| assert data["categories"][0]["products"].size == 12, "Products don't match" | ||
| data["categories"][0]["products"].each do |p| | ||
| assert p["CategoryID"] == 1, "CategoryID doesn't match" | ||
| end | ||
| assert data["delayedOrders"].size == 17 | ||
| assert data["delayedOrders"][0]["OrderID"] == 10_249 | ||
| end | ||
| def validate_page_json | ||
| file = "_site/suppliers.json" | ||
| read_data = JSON.parse(File.read(file)) | ||
| data = read_data["allSuppliers"] | ||
| assert data.size == 29, "Expected 29 suppliers, got #{data.size}" | ||
| r = query_db("SELECT CompanyName, SupplierID FROM Suppliers ORDER BY SupplierID LIMIT 1") | ||
| assert r["CompanyName"] == data[0]["CompanyName"], "Company Name doesn't match" | ||
| assert r["SupplierID"] == data[0]["SupplierID"], "Supplier ID doesn't match" | ||
| assert data[0]["products"].size == 3, "Products don't match" | ||
| assert data[0]["products"][0]["ProductName"] == "Chai" | ||
| assert data[0]["products"][1]["ProductName"] == "Chang" | ||
| assert data[0]["products"][2]["ProductName"] == "Aniseed Syrup" | ||
| # Focus Supplier - this uses the data from the page front-matter to prepare a query | ||
| fs = query_db("SELECT * FROM Suppliers WHERE SupplierID = 6") | ||
| assert read_data["focusSupplier"][0] == fs, "Focus Supplier doesn't match" | ||
| end | ||
| # rubocop:enable Metrics/AbcSize | ||
| # rubocop:enable Metrics/MethodLength | ||
| task default: :rubocop | ||
| desc "Build Test Site" | ||
| task :test do | ||
| Dir.chdir("test") | ||
| Jekyll::Site.new(Jekyll.configuration).process | ||
| validate_json | ||
| validate_page_json | ||
| end |
+62
-57
@@ -5,3 +5,4 @@ # Jekyll SQLite plugin | ||
| It additionally supports nested queries, so that you can use the rows of `site.data.items` as bind_params for your nested query. | ||
| It supports site-level queries, per-page queries, and prepared queries that can | ||
| use existing data (possibly generated via more queries) as parameters. | ||
@@ -35,16 +36,5 @@ [](https://github.com/captn3m0/jekyll-sqlite/actions/workflows/main.yml) [](https://badge.fury.io/rb/jekyll-sqlite) | ||
| sqlite: | ||
| - data: orders | ||
| file: &db "_db/northwind.db" | ||
| query: SELECT * from Orders | ||
| - data: customers | ||
| file: *db | ||
| query: SELECT * from Customers | ||
| - data: categories | ||
| file: *db | ||
| query: SELECT CategoryID, CategoryName, Description FROM Categories | ||
| # Note that the CategoryID parameter in the query is coming from site.data.categories[].CategoryID | ||
| # which was picked up in the previous query | ||
| - data: categories.products | ||
| file: *db | ||
| query: SELECT ProductID, ProductName FROM Products WHERE Products.CategoryID=:CategoryID | ||
| ``` | ||
@@ -55,43 +45,18 @@ | ||
| ```liquid | ||
| {{ site.data.categories | jsonify }} | ||
| {{ site.data.customers | jsonify }} | ||
| ``` | ||
| ## Generating Pages | ||
| ## Prepared Queries | ||
| It works well with the `datapage_gen` plugin: | ||
| This plugin supports prepared queries with parameter binding. This lets you | ||
| use existing data from a previous query, or some other source (such as | ||
| `site.data.*` or `page.*`) as a parameter in your query. | ||
| See the [datapage_gen](https://github.com/avillafiorita/jekyll-datapage_gen) docs for more details. | ||
| Say you have a YAML file defining your items (`data/books.yaml`): | ||
| Here's a sample configuration: | ||
| ```yaml | ||
| # This will automatically generate a file for each restaurant | ||
| # restaurants/#{id}.html file | ||
| # with the layout `_layouts/restaurant.html` | ||
| # and page.id, page.name, page.active set | ||
| # and page.title set to restaurant name | ||
| sqlite: | ||
| restaurants: | ||
| file: _db/reviews.db | ||
| sql: SELECT id, name, last_review_date > 1672531200 as active, address FROM restaurants; | ||
| page_gen: | ||
| - data: restaurants | ||
| template: restaurant | ||
| name: id | ||
| title: name | ||
| filter: active | ||
| ``` | ||
| ## Nested Queries | ||
| You can use the rows of `site.data.items` as bind_params for your nested query. For this to work against | ||
| data generated by the plugin, the configuration order must be correct, so you need `site.data.items` above `site.data.items.nested` in your configuration. | ||
| Say you have a YAML file defining your items (`data/items.yaml`): | ||
| ```yaml | ||
| - id: 31323952-2708-42dc-a995-6006a23cbf00 | ||
| name: Item 1 | ||
| name: Time Travel with a Rubber Band | ||
| - id: 5c8e67a0-d490-4743-b5b8-8e67bd1f95a2 | ||
| name: Item 2 | ||
| name: The Art of Cache Invalidation | ||
| ``` | ||
@@ -102,4 +67,5 @@ and the prices for the items in your SQLite database, the following configuration will enrich the `items` array with the price: | ||
| sql: | ||
| - data: site.data.items.meta | ||
| query: SELECT price,author FROM pricing WHERE id =:id | ||
| - data: items.books | ||
| query: SELECT price, author FROM pricing WHERE id =:id | ||
| db: books.db | ||
| ``` | ||
@@ -114,19 +80,58 @@ This would allow the following Liquid loop to be written: | ||
| This works well with `results_as_configuration` as well. | ||
| ## Per Page Queries | ||
| The exact same syntax can be used on a per-page basis to generate data within | ||
| each page. This is helpful for keeping page-specific queries within the page | ||
| itself. Here's an example: | ||
| ```yaml | ||
| sql: | ||
| - data: site.data.items.meta | ||
| query: SELECT price,author FROM pricing WHERE id =:id | ||
| results_as_hash: false | ||
| --- | ||
| FeaturedSupplierID: 2 | ||
| sqlite: | ||
| - data: suppliers | ||
| file: "_db/northwind.db" | ||
| query: "SELECT CompanyName, SupplierID FROM suppliers ORDER BY SupplierID" | ||
| - data: suppliers.products | ||
| # This is a prepared query, where SupplierID is coming from the previous query. | ||
| file: "_db/northwind.db" | ||
| query: "SELECT ProductName, CategoryID,UnitPrice FROM products WHERE SupplierID = :SupplierID" | ||
| # :FeaturedSupplierID is picked up automatically from the page frontmatter. | ||
| - data: FeaturedSupplier | ||
| file: "_db/northwind.db" | ||
| query: "SELECT * SupplierID = :FeaturedSupplierID" | ||
| --- | ||
| {{page.suppliers|jsonify}} | ||
| ``` | ||
| The following also renders the price and author: | ||
| This will generate a `page.suppliers` array with all the suppliers, and a `page.FeaturedSupplier` object with the details of the featured supplier. | ||
| ```liquid | ||
| {% for item in site.data.items %} | ||
| {{item.meta[0]}}, {{item.meta[1]}} | ||
| {% endfor %} | ||
| Each supplier will have a `products` array with all the products for that supplier. | ||
| ## Generating Pages | ||
| It works well with the `datapage_gen` plugin: | ||
| See the [datapage_gen](https://github.com/avillafiorita/jekyll-datapage_gen) docs for more details. | ||
| Here's a sample configuration: | ||
| ```yaml | ||
| sqlite: | ||
| restaurants: | ||
| file: _db/reviews.db | ||
| sql: SELECT id, name, last_review_date > 1672531200 as active, address FROM restaurants; | ||
| page_gen: | ||
| - data: restaurants | ||
| template: restaurant | ||
| name: id | ||
| title: name | ||
| filter: active | ||
| ``` | ||
| This will automatically generate a file for each restaurant | ||
| restaurants/#{id}.html file with the layout `_layouts/restaurant.html` and page.id, page.name, page.active set and page.title set to restaurant name | ||
| Note that the `datapage_gen` plugin will run _after_ the `jekyll-sqlite` plugin, | ||
| if you generate any pages with per-page queries, these queries will not execute. | ||
| ## Development | ||
@@ -133,0 +138,0 @@ |