I did some digging the other day to try to figure out how to use the excellent Python datetime library Arrow with the workhorse psycopg2 Python-PostgreSQL database adapter (plus the nifty Peewee ORM on top of psycopg2). I was pleasantly surprised how easy and painless it was to implement, with help from a blog post by Omar Rayward, and the psycopg2 docs (and source code) as a guide.
There are 5 core PostgreSQL date/time types that Arrow can handle, which psycopg2 maps to the 3 core Python date/time classes — by default through 4 core psycopg2 datatypes:
PostgreSQL Type | Example Output | Psycopg2 Type | Python Type |
---|---|---|---|
timestamp [without time zone] | 2001-02-03 04:05:06 | PYDATETIME | datetime |
timestamp with time zone | 2001-02-03 04:05:06-07 | PYDATETIMETZ | datetime |
date | 2001-02-03 | PYDATE | date |
time [without time zone] | 04:05:06 | PYTIME | time |
time with time zone | 04:05:06-07 | PYTIME | time |
Arrow can be used to handle each of these 5 types, via its single Arrow class. Here's how you set up the mappings:
import arrow
import psycopg2.extensions
def adapt_arrow_to_psql(value):
"""Formats an Arrow object as a quoted string for use in a SQL statement."""
# assume Arrow object is being used for TIME datatype if date is 1900 or earlier
if value.year <= 1900:
value = value.format("HH:mm:ss.SZ")
elif value == arrow.Arrow.max:
value = "infinity"
elif value == arrow.Arrow.min:
value = "-infinity"
return psycopg2.extensions.AsIs("'{}'".format(value))
# register adapter to format Arrow objects when passed as parameters to SQL statements
psycopg2.extensions.register_adapter(arrow.Arrow, adapt_arrow_to_psql)
def cast_psql_date_to_arrow(value, conn):
"""Parses a SQL timestamp or date string to an Arrow object."""
# handle NULL and special "infinity"/"-infinity" values
if not value:
return None
elif value == "infinity":
return arrow.Arrow.max
elif value == "-infinity":
return arrow.Arrow.min
return arrow.get(value)
def cast_psql_time_to_arrow(value, conn):
"""Parses a SQL time string to an Arrow object."""
# handle NULL
if not value:
return None
# handle TIME, TIME with fractional seconds (.S), and TIME WITH TIME ZONE (Z)
return arrow.get(value, ["HH:mm:ss", "HH:mm:ss.S", "HH:mm:ssZ", "HH:mm:ss.SZ"])
# override default timestamp/date converters
# to convert from SQL timestamp/date results to Arrow objects
psycopg2.extensions.register_type(psycopg2.extensions.new_type(
(
psycopg2.extensions.PYDATETIME.values +
psycopg2.extensions.PYDATETIMETZ.values +
psycopg2.extensions.PYDATE.values
),
"ARROW",
cast_psql_date_to_arrow,
))
# override default time converter to convert from SQL time results to Arrow objects
psycopg2.extensions.register_type(psycopg2.extensions.new_type(
psycopg2.extensions.PYTIME.values, "ARROW_TIME", cast_psql_time_to_arrow
))
The 3 slightly tricky bits are:
- Deciding whether to format an Arrow object as a date or a time (in adapt_arrow_to_psql()) — you may want to handle it differently, but since Arrow will parse times without dates as occurring on "0001-01-01", the simplest thing to do is assume a date with an early year (like 1900 or earlier) represents a time instead of a date (which allows round-tripping of times from PostgreSQL to Arrow and back).
- Handling PostgreSQL's special "-infinity" and "infinity" values when converting between PostgreSQL and Arrow dates (in adapt_arrow_to_psql() and cast_psql_date_to_arrow()) — Arrow.min and Arrow.max are the closest equivalents.
- Handling the 4 different time variants that PostgreSQL emits (in cast_psql_time_to_arrow()):
- "12:34:56" (no fractional seconds or time zone)
- "12:34:56.123456" (fractional seconds but no time zone)
- "12:34:56-07" (no fractional seconds but time zone)
- "12:34:56.123456-07" (fractional seconds and time zone)
With those mappings in place, you can now use Arrow objects natively with psycopg2:
import arrow
import psycopg2
def test_datetimes():
conn = psycopg2.connect(dbname="mydbname", user="myuser")
try:
cur = conn.cursor()
cur.execute("""
CREATE TABLE foo (
id SERIAL PRIMARY KEY,
dt TIMESTAMP,
dtz TIMESTAMP WITH TIME ZONE,
d DATE,
t TIME,
twtz TIME WITH TIME ZONE
)
""")
cur.execute(
"INSERT INTO foo (dt, dtz, d, t, twtz) VALUES (%s, %s, %s, %s, %s)",
(
arrow.get("2001-02-03 04:05:06"),
arrow.get("2001-02-03 04:05:06-07"),
arrow.get("2001-02-03"),
arrow.get("04:05:06", "HH:mm:ss"),
arrow.get("04:05:06-07", "HH:mm:ssZ"),
),
)
cur.execute("SELECT * FROM foo")
result = cur.fetchone()
assert result[1] == arrow.get("2001-02-03 04:05:06")
assert result[2] == arrow.get("2001-02-03 04:05:06-07")
assert result[3] == arrow.get("2001-02-03")
assert result[4] == arrow.get("04:05:06", "HH:mm:ss")
assert result[5] == arrow.get("04:05:06-07", "HH:mm:ssZ")
finally:
conn.rollback()
Or with the Peewee ORM, you can use Peewee's built-in date/time fields, and pass and receive Arrow objects to/from those fields:
import arrow
import peewee
import playhouse.postgres_ext
db = playhouse.postgres_ext.PostgresqlExtDatabase("mydbname", user="myuser")
class Foo(peewee.Model):
dt = peewee.DateTimeField(
default=arrow.utcnow,
constraints=[peewee.SQL("DEFAULT (CURRENT_TIMESTAMP AT TIME ZONE 'UTC')")],
)
dtz = playhouse.postgres_ext.DateTimeTZField(
default=arrow.utcnow,
constraints=[peewee.SQL("DEFAULT (CURRENT_TIMESTAMP AT TIME ZONE 'UTC')")],
)
d = peewee.DateField(
default=arrow.utcnow,
constraints=[peewee.SQL("DEFAULT (CURRENT_DATE AT TIME ZONE 'UTC')")],
)
t = peewee.TimeField(
default=lambda: arrow.utcnow().time(),
constraints=[peewee.SQL("DEFAULT (CURRENT_TIME AT TIME ZONE 'UTC')")],
)
class Meta:
database = db
def test_datetimes():
with db.transaction() as tx:
try:
Foo.create_table()
result = Foo.get_by_id(
Foo.create(
dt=arrow.get("2001-02-03 04:05:06"),
dtz=arrow.get("2001-02-03 04:05:06-07"),
d=arrow.get("2001-02-03"),
t=arrow.get("04:05:06", "HH:mm:ss"),
).id
)
assert result.dt == arrow.get("2001-02-03 04:05:06")
assert result.dtz == arrow.get("2001-02-03 04:05:06-07")
assert result.d == arrow.get("2001-02-03")
assert result.t == arrow.get("04:05:06", "HH:mm:ss")
finally:
tx.rollback()
No comments:
Post a Comment