# SQL

Below the SQL dialect supported by MamBase is described. For SQL support development, see SQL Roadmap. All the examples in this document use the database described in Demo database.

# CREATE TABLE

Syntax

CREATE TABLE [IF NOT EXISTS] tbl_name 
    ( create_definition [, create_definition] ... )
 
create_definition:
  col_name data_type 
    [NOT NULL | NULL] [DEFAULT literal]
    [PRIMARY KEY]
  | [CONSTRAINT [symbol]] FOREIGN KEY
    [index_name] (col_name) REFERENCES tbl_name

Example

CREATE TABLE person (
  p_id varchar PRIMARY KEY,
  name varchar, 
  lastname varchar, 
  age bigint, 
  city varchar, 
  income int, 
  dbl double, 
  dtime datetime
);

If the table name starts with the '#' sign, then a temporary table is created

Example

CREATE TABLE #tempt1 (
  p_id varchar PRIMARY KEY,
  name varchar, 
  lastname varchar, 
  age bigint, 
  city varchar, 
  income int, 
  dbl double, 
  dtime datetime
);

Current version limitations:

primary key cannot be compound;

# CREATE TABLE AS EDGE

A special table for graph edge representation will be created. Such a table could be considered as an intermediate many-to-many table with two standard fields: fromid and toid. These fields will be added automatically.

Syntax

CREATE TABLE link_name AS EDGE from_tablename to_tablename;

Example

CREATE TABLE owner AS EDGE car person;

Query example

SELECT p.id, name, lastname, age, c.id, c.model 
FROM person p 
  JOIN owner o ON p.id = o.toid 
  JOIN car c ON c.id = o.fromid 
WHERE age > 20 AND model = 'Toyota'

The following query gives the same result (see also the JOIN expression description):

SELECT p.id, name, lastname, age, c.id, c.model 
FROM person p 
  JOIN car c ON p.id = c.owner 
WHERE age > 20 AND model = 'Toyota'

# ALTER TABLE

Syntax

ALTER TABLE tbl_name
  [alter_specification [, alter_specification] ...]

alter_specification:
  | ADD [COLUMN] column_definition,...
  | ADD [COLUMN] (column_definition,...)
  | {ALTER|MODIFY} [COLUMN] column_definition,...
  | DROP [COLUMN [if exists]] col_name

column_definition:
  col_name data_type 
    [NOT NULL | NULL] [DEFAULT literal]
  | [CONSTRAINT [symbol]] FOREIGN KEY
    [index_name] (col_name) REFERENCES tbl_name

Example

ALTER TABLE person ADD birthday DATE;

# CREATE INDEX

Syntax

CREATE [UNIQUE] INDEX index_name ON tbl_name (col_name,...)

Example

CREATE INDEX p_ndx_age ON person (age);

# DROP TABLE

Syntax

DROP TABLE [IF EXISTS] tbl_name

tbl_name - is a table name or edge table name

Example

DROP TABLE person;

# DROP INDEX

Syntax

DROP INDEX index_name [ON tbl_name]

Example

DROP INDEX p_ndx_age;

# INSERT

Syntax

INSERT INTO tbl_name
  [(col_name,...)]
  VALUES (value,...)

Example

INSERT INTO person
  (id, name, lastname, age, city, dbl, dtime)
VALUES
  ('person5000', 'John', 'Tester', 30, 
  'Lisbon', 1.11, '2018-07-03 01:52:02.65');

# INSERT INTO ... SELECT ...

Syntax

INSERT INTO tbl_name
  [(col_name,...)]
  SELECT ...

Example

INSERT INTO table2
  (id, name, lastname, age, city, dbl, dtime)
SELECT 
  id, name, lastname, age, city, dbl, dtime 
FROM person
WHERE age < 30

# BULK INSERT

Syntax

BULK INSERT table_name (col_name,...) 
  FROM 'file_path' 
  [WITH ([FIRSTROW = number,] [FIELDTERMINATOR = 'character'])]

Example

BULK INSERT person (id, name, lastname, age, city, income, dbl, dtime) 
FROM 'sntest1/person.csv' 
WITH (FIRSTROW = 2, FIELDTERMINATOR = ',');

Note

The FIRSTROW parameter specifies the line number from which the import should be started. For example, one should write FIRSTROW = 2 in order to skip headers.

Filepath is absolute or relative with respect to the directory where the running server file is located. For example, if mbserver.exe file is located in the c:/mambase/bin directory, then the ../data/csv/person.csv path will be expanded as c:/mambase/data/csv/person.csv.

# UPDATE

Syntax

UPDATE table_reference
  SET col_name = value,...
  [WHERE where_condition]

UPDATE table_name_or_alias
  SET col_name = value,...
  FROM tablename [alias] | join_expression
  [WHERE where_condition]

Example

UPDATE person SET name = 'LAMAR' WHERE name = 'Lamar'

Note

Syntax of the FROM and WHERE expressions is the same as in the SELECT query.

# DELETE

Syntax

DELETE FROM tbl_name
  [WHERE where_condition]
 
DELETE table_name_or_alias FROM join_expression
  [WHERE where_condition]

Example

DELETE FROM person WHERE name = 'Livia'

delete with join expression

DELETE FROM p 
FROM person p join car c ON p.id = c.owner 
WHERE c.model = 'Toyota';

Note

Syntax of the FROM and WHERE expressions is the same as in the SELECT query.

When deleting a record all the references to it will be deleted automatically, and all the records from edge-table, where the deleting record was mentioned in fromid or toid field, will be deleted.

# TRUNCATE TABLE

Syntax

TRUNCATE TABLE tbl_name

Example

TRUNCATE TABLE #temptable1;

# BACKUP

BACKUP DATABASE TO path

Example

BACKUP DATABASE TO 'c:/data/sntest1/backup'

# SET DATEFORMAT

Syntax

SET DATEFORMAT mdy | dmy | ymd | ydm | myd | dym

Example

SET DATEFORMAT ymd

# SELECT

Syntax

SELECT
  [ DISTINCT ]
  select_expression [, select_expression ...]
  FROM tablename | join_expression
  [WHERE where_condition]
  [WHERE INDEX where_condition]
  [GROUP BY column_definition1 [, column_definition1 ...]]
  [HAVING where_condition]
  [ORDER BY column_definition2 [ASC | DESC] [, column_definition2 …] ]
  [LIMIT row_count] 
  [OFFSET offset]

column_definition1:
  column_name | expression

column_definition2:
  column_name | column_number | expression

# SELECT expression

May contain the following:

List of fields:
```
SELECT name, lastname FROM person
```

All fields:

SELECT * FROM person
SELECT p.*, c.model FROM person p JOIN car c ON p.id = c.owner

AS:

SELECT count(*) AS countofpersons FROM person

Simple functions and expressions:

SELECT UPPER(name) AS uname, age*2 AS dbl FROM person

Aggregate functions:

SELECT COUNT(age), COUNT_BIG(age), MIN(age), MAX(age), AVG(age) FROM person;

DISTINCT:

SELECT distinct name, lastname FROM person

nested SELECT:

SELECT 
  name, 
  (SELECT model FROM car WHERE p.id = owner) 
    AS modelname 
FROM person p;

CASE...WHEN:

SELECT name, 
  CASE name 
    WHEN 'Lamar' THEN 'L' 
    WHEN 'Mercedez' THEN 'M' 
    ELSE 'U' END AS n
FROM person;

SELECT id, 
  CASE WHEN age < 30 THEN 1 
  ELSE 2 END as age_category, 
  name, lastname, income
from person

# WHERE clause

Comparison and logical operators:

SELECT * FROM person WHERE age > 20 AND name = 'Lamar'

LIKE operator:

SELECT * FROM person WHERE name LIKE '%amar%'
SELECT * FROM person WHERE name LIKE 'L[ai]%'

IN or NOT IN operator:

SELECT name, age FROM person 
WHERE age IN (20, 22, 24) AND name IN ('Lamar', 'Susan')

SELECT * FROM customers 
WHERE ctype IN ( 
    SELECT distinct otype 
    FROM orders 
    WHERE odate >= '2020-03-03' 
  );

NULL check:

SELECT * FROM person WHERE age IS NULL
SELECT * FROM person WHERE age IS NOT NULL

Subqueries with EXISTS or NOT EXISTS

SELECT * FROM person WHERE age > 40 
  AND EXISTS( SELECT * FROM car WHERE car.owner = person.id );

Other operators and functions (see below for a list of operators and functions):

SELECT * FROM person WHERE age*2 - income/20 > 0 AND UPPER(name) = 'LAMAR'

# FROM and JOIN clauses

Single table query:

SELECT * FROM person

JOIN operations through FOREIGN-KEY field or EDGE intermediate table are quick. We recommend heavily using such simple JOIN operations when developing the database.

MamBase significantly speeds up the processing of such JOIN operations, as it is multi-model and uses graph subsystem features for storing links.

In the relational representation, relationships between objects are represented as links from FOREIGN KEY fields to the id field of a record in the same or different table. The ON condition can only contain the operator =.

JOIN between two tables via foreign key:

SELECT name, model FROM person p JOIN car c ON p.id = c.owner

JOIN between two tables through an intermediate table (in this example through the table owner having type EDGE intended to describe the graph edges):


SELECT p.id, name, lastname, age, c.id, c.model
FROM person p 
  JOIN owner o ON p.id = o.toid 
  JOIN car c ON c.id = o.fromid
WHERE age > 20 AND model = 'Toyota';

JOIN queries through simple non-FOREIGN-KEY fields are slow. We recommend rebuilding the database structure to use FOREIGN-KEY or at least create index (indexes) for the fields used in JOIN expression.

JOIN through simple non-FOREIGN-KEY field

SELECT p1.id, p2.id 
FROM person p1 
  JOIN person2 p2 
  ON p1.name = p2.name

an index is required to complete such query

CREATE INDEX p_name ON person (name)

and/or

CREATE INDEX p2_name ON person2 (name)

JOIN through several fields with and condition

SELECT p1.id, p2.id 
FROM person p1 
  JOIN person2 p2 
  ON p1.name = p2.name AND p1.lastname = p2.lastname

To perform this query it is desirable to create an index on two fields

CREATE INDEX p_name_lastname ON person (name, lastname)

or (for person2 table)

CREATE INDEX p2_name_lastname ON person2 (name, lastname)

But it is enough to have one simple index on the field name (or lastname) for the table person (or person2).

CREATE INDEX p_name ON person (name)  
CREATE INDEX p_name ON person (lastname)  
CREATE INDEX p_name ON person2 (name)  
CREATE INDEX p_name ON person2 (lastname)

OUTER JOIN
In current version, OUTER JOIN works only for the links through FOREIGN-KEY fields.

SELECT p.*, c.model 
FROM person p 
LEFT OUTER JOIN car c 
ON p.id = c.owner AND c.model = 'Honda'

SELECT p.*, c.model 
FROM person p 
  RIGHT OUTER JOIN car c 
  ON p.id = c.owner AND p.age = 45

JOIN with the nested query SELECT

SELECT p.age, p2.age 
FROM person p 
  JOIN ( SELECT name, lastname, age FROM person ) p2 
  ON p.name = p2.name and p.lastname = p2.lastname

# GROUP BY clause

Grouping by single field:

SELECT COUNT(dbl), AVG(dbl), MIN(dbl), MAX(dbl) 
FROM person 
GROUP BY city;

Grouping by multiple fields:

SELECT COUNT(dbl), AVG(dbl), MIN(dbl), MAX(dbl) 
FROM person 
GROUP BY city, name;

Filtering aggregates with HAVING:

SELECT city, AVG(dbl) AS avg_dbl
FROM person
GROUP BY city
HAVING (avg_dbl <= 0.5);

It is possible to use simple functions and DISTINCT in aggregate functions:

SELECT AVG(income*dbl) AS avg_expr FROM person

SELECT AVG( 
  CASE WHEN name = 'Lamar' THEN 1 
       WHEN name = 'Mercedez' THEN 2 
       ELSE 0 end
  ) AS avg_expr  
FROM person'

SELECT city, COUNT(DISTINCT id) FROM person GROUP BY city;

SELECT AVG(income*dbl) AS avg_expr FROM person

SELECT AVG( CASE WHEN name = 'Lamar' THEN 1 WHEN name = 'Mercedez' THEN 2 ELSE 0 END) AS avg_name  
FROM person

SELECT city, COUNT(DISTINCT id) FROM person GROUP BY city

If GROUP BY is omitted, but aggregate functions are present, then the whole result is considered as a single group.

# ORDER BY clause

Simple sorting by single field:

SELECT name, lastname, age
FROM person 
ORDER BY name

Sorting by multiple fields with order specified:

SELECT name, lastname, age
FROM person
ORDER BY name, lastname ASC, age DESC

Sorting by column number
(column number in select expression starts from 1):

SELECT name, lastname, age
FROM person
ORDER BY 1, lastname ASC, 3 DESC;

Sorting by expression:

SELECT name, lastname, age
FROM person
ORDER income/age, name, lastname;

# LIMIT and OFFSET clauses

LIMIT and OFFSET are used to specify the number of results:

First 100 results:
```
SELECT * FROM person LIMIT 100
```
10 results starting FROM 100th (i. e., skipping first 100 results, as numbering starts FROM zero):
```
SELECT * FROM person LIMIT 10 OFFSET 100
```

# Operators

comparison operators:

>, >=, <, <=, =, <>, !=, IS NULL, IS NOT NULL, IN
```
SELECT * FROM person WHERE age IS NULL
```
LIKE, which matches strings against simple patterns (similar to LIKE in MS SQL Server).
In addition to regular characters, a pattern may contain wildcards. % denotes any number of subsequent characters; _ denotes any single character; [] is for any single character within a range ([a-z]) or set ([abc]); [^] is for any single character not within a range ([^a-z]) or set ([^abc]). In order to escape special characters, enclose them into [].

logical operators:

and (&&), or (||), not (!)

SELECT * FROM person
WHERE age > 50 AND (name = 'Lamar' OR lastname = 'Wurdeman')

arithmetic operators:

+, -, *, /

SELECT age*2 FROM person WHERE age*100/income > 3

# UNION

Syntax

SELECT ...
UNION [ALL] 
SELECT ...
[UNION [ALL] SELECT ...]

Example

SELECT id, name, lastname, city FROM person WHERE id = 'person22' 
UNION 
SELECT id, name, lastname, city FROM person WHERE id = 'person33' 
UNION 
SELECT id, name, lastname, city FROM person WHERE id = 'person55

# FUNCTIONS

Name	Description
`ABS`	Returns the absolute value of a number
`ACOS`	Returns the arccosine of a number
`ASCII`	Returns the ASCII value for the specific character
`ASIN`	Returns the arc sine of a number
`ATAN`	Returns the angle, in radians, whose tangent is a specified float expression
`CAST`	Convert a value of any type into a value with a specified type
`CEIL`, `CEILING`	Returns the smallest integer value that is larger than or equal to a number
`CHR`	Accepts an ASCII code and returns the corresponding character
`CONCAT`	Adds two or more expressions together
`COS`	Returns the cosine of a number
`COT`	Returns the cotangent of a number
`DATEPART`	Returns the integer value of the specified date part
`DATETIME2FROMPARTS`	Returns datetime2, made with arguments
`DIV`	Used for integer division (x is divided by y). An integer value is returned
`EXP`	Returns the exponential value of the specified float expression.
`ISNULL`	Checks expression for NULL and returns the specified value if NULL otherwise returns the value of the expression
`LEFT`, `STRLEFT`	Returns the left part of a character string with the specified number of characters
`LEN`, `LENGTH`	Returns the length of a string (in bytes)
`LN`, `LOG`	Returns the natural logarithm of a number
`LOG10`	Returns the base-10 logarithm of the specified float expression
`LOG2`	Returns the base-2 logarithm of a specified value
`LOWER`	Converts a string to lower-case
`LPAD`	Left-pads a string with another string, to a certain length
`MOD`	Function is used to get the remainder from a division
`MONTH`	Return the month part of a date
`PI`	Returns the constant value of Pi
`POWER`	Returns the value of the specified expression to the specified power
`POS`, `POSITION`	Returns the position of the first occurrence of a substring in a string
`RAND`	Returns a pseudo-random float value from 0 through 1, exclusive.
`REPEAT`	Repeats a string as many times as specified
`REPLACE`	Replaces all occurrences of a substring within a string
`REVERSE`	Reverses a string and returns the result
`ROUND`	Returns a numeric value, rounded to the specified length or precision
`YEAR`	Function returns the year part for a specified date
`STRLEFT`	Extracts a number of characters from a string (starting from left)
`STRRIGHT`	Extracts a number of characters from a string (starting from right)
`RPAD`	Right-pads a string with another string, to a certain length
`REGEX`	Match regex pattern
`RIGHT`, `STRRIGHT`	Returns the right part of a character string with the specified number of characters
`SIGN`	Returns the sign of a number: -1 for negatives, 0 for 0, 1 for positives
`SIN`	Returns the sine of a number
`SQRT`	Returns the square root of a number
`SUBSTR`, `SUBSTRING`	Extracts a substring from a string (starting at any position)
`TAN`	Returns the tangent of a number
`TRIM`	Removes leading and trailing spaces from a string
`TRUNCATE`	Truncates a number to the specified number of decimal places
`UPPER`	Converts a string to upper-case

# DATA TYPES

Type	Pseudonyms	Description
`BIT`	`BOOL`	0 or 1, a boolean data type
`INT`	`INTEGER`, `TINYINT`	4-byte integer: from `-2 147 483 648` to `2 147 483 647`
`BIGINT`	—	8-byte integer: from `-9 223 372 036 854 775 808` to `9 223 372 036 854 775 807`
`REAL(s,d)`	`DOUBLE(size,d)`, `FLOAT(size,d)`	8-byte floating-point number with scale `1.7E+308` and precision up to `1.7E-308`. Values in round brackets are ignored
`DECIMAL(s,d)`	`NUMERIC(size,d)`	8-byte floating-point number. In this version it is identical to DOUBLE, in the next versions it will be implemented using decimal arithmetic.
`DATE`	—	Dates from `0001-01-01` (January 1st, 0001) to `9999-12-31` (December 31st, 9999). Supported string representations: `YYYY-MM-DD`: `2017-07-31` `YYYY/MM/DD`: `2017/07/31` `YYYY.MM.DD`: `2017.07.31`
`DATETIME`	—	Date and time, where date is in the same interval as in `DATE`. Supported string representations: `YYYY-MM-DD hh:mm:ss:nnnnnnn`: `2017-07-31 01:21:34.1234567` `YYYY/MM/DD hh:mm:ss:nnnnnnn`: `2017/07/31 01:21:34.1234567` `YYYY.MM.DD hh:mm:ss:nnnnnnn`: `2017.07.12 01:21:34.1234567`
`DATETIME2 (prec)`	—	Almost identical to `DATETIME` except for specifying the number of characters in fractional seconds
`CHAR(size)`	`VARCHAR(size)`,`NVARCHAR(size)`, `TEXT`	String. In this version, values in parentheses are ignored. In the next version, the string length will be limited to the specified value
`VARBINARY (size)`	—	Binary data. Example of string represantation: `0x000000A4` (in this version, values in parentheses are ignored. In the next version, the data length will be limited to the specified value)

# PSEUDO TABLES

# EDGE-TABLES

Table creation:

CREATE TABLE friends AS EDGE person person;

Filling in the table:

INSERT INTO friends VALUES ( 'person22', 'person1022' );
INSERT INTO friends VALUES ( 'person22', 'person1023' );

Data retrieval:

SELECT p1.id, p2.id
  FROM person p1
  JOIN friends f ON p1.id = f.fromid
  JOIN person p2 ON f.toid = p2.id
WHERE p1.id = 'person22';

Updating data:

UPDATE friends SET fromid = 'person7', toid = 'person67' 
WHERE fromid = 'person7' AND toid = 'person1007'

UPDATE car SET owner = 'person23' WHERE id = 'car4022',

Deleting data:

DELETE FROM friends WHERE fromid = 'person22' AND toid = 'person1022';

DELETE FROM owner WHERE toid = 'person22',

DELETE FROM owner WHERE fromid = 'car4021',

# Obtaining the list of tables

SELECT * FROM INFORMATION_SCHEMA.TABLES

The result contains two fields:

name — table name;
type — table type (2 — regular table; 3 — link table, see CREATE TABLE AS EDGE).

Note. C++ API has special function mb_get_tableslist() for getting list of tables.

# Obtaining the list of table fields

SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME='car'

A result contains five fields:

name — field name;
type — field type:
1 — string,
2 — int,
3 — bigint,
4 — double,
5 — datetime,
6 — bool,
7 — date,
8 — if field is a primary key or a foreign key (and is a string thereafter);
subtype — specifies the previous field:
0 — regular field,
1 — primary key,
2 — foreign key;
linktable — referenced table name for foreign key;
nullable — always TRUE in the current version.

Note. C++ API has special function mb_get_tableschema() for getting list of fields.

# USING INDEXES

Index creating

CREATE INDEX p_age ON person (age)  
CREATE INDEX p_name ON person (name)

Index deleting

DROP INDEX index_name [ON tbl_name]

Using index in simple queries

SELECT * FROM person WHERE age = 22

This query uses the p_age index

SELECT * FROM person WHERE age = 22 and name = 'Lamar'

In this case, two indexes p_age and p_name are used.

Queries on the range

SELECT * FROM person WHERE age >= 22 and age < 24

Multiple Field Indexes

CREATE INDEX p_name_lastname ON person (name, lastname)  
SELECT * FROM person WHERE name = 'Lamar' and lastname = 'Tanon'

When processing the simple queries the indexes are used when the condition using index selects less than 6% of records.

For simple comparisons it is possible to explicitly require the use of a pre-created index. To do that type INDEX keyword after WHERE keyword:

SELECT name, lastname, city, model 
FROM person p JOIN car c ON c.owner = p.id 
WHERE INDEX name = 'Lamar';

Indexes are also used when processing queries containing a JOIN expression (see section "FROM and JOIN clauses")

SELECT name, lastname, city, model 
FROM person p JOIN car c ON c.owner = p.id 
WHERE INDEX name = 'Lamar';

← Installation and administration Graph-SQL →