tools for processing fdx reports

This commit is contained in:
2021-10-20 20:13:40 -06:00
parent c48dc65f3c
commit 6b11ef79ea
+132
View File
@@ -0,0 +1,132 @@
#!/bin/python
"""
batch process dsw xls files into existing sqlite3 database
usage sql-dsw INFILE DATABASE
"""
import sys
import xlrd
import argparse
import sqlite3
from sqlite3 import Error
def openDatabase(database):
conn = None
try:
conn = sqlite3.connect(database)
except Error as e:
print(e)
return conn
def appendLedger(conn, table, values):
"""
add values to sqlite3 database
"""
holders = ','.join('?' * len(values))
c = conn.cursor()
c.execute('INSERT INTO {} VALUES ({})'.format(table, holders), values)
conn.commit()
def extractValues(srcSheet, conn, reportDate):
"""
get dsw data from srcSheet and call appendLedger with values
"""
# we dont extract data until we find a particular spot in the worksheet
# there may be a search or other better way to do this
extract = False
# loop through the data
for i in range(srcSheet.nrows):
#exit test comes first
if 'Total' in srcSheet.cell(i,0).value:
# We need to get the contract totals so we'll do that here
values = ('contract', reportDate, srcSheet.cell(i,6).value,
srcSheet.cell(i,7).value, srcSheet.cell(i,8).value,
srcSheet.cell(i,10).value, srcSheet.cell(i,11).value,
srcSheet.cell(i,12).value, srcSheet.cell(i,13).value,
srcSheet.cell(i,14).value, srcSheet.cell(i,20).value,
srcSheet.cell(i,18).value, srcSheet.cell(i,32).value,
srcSheet.cell(i,33).value, srcSheet.cell(i,34).value)
appendLedger(conn, 'totals', values)
i += 1
values = ('terminal', reportDate, srcSheet.cell(i,6).value,
srcSheet.cell(i,7).value, srcSheet.cell(i,8).value,
srcSheet.cell(i,10).value, srcSheet.cell(i,11).value,
srcSheet.cell(i,12).value, srcSheet.cell(i,13).value,
srcSheet.cell(i,14).value, srcSheet.cell(i,20).value,
srcSheet.cell(i,18).value, srcSheet.cell(i,32).value,
srcSheet.cell(i,33).value, srcSheet.cell(i,34).value)
appendLedger(conn, 'totals', values)
#we're done so exit
break
# get the individual route totals, need to figure a way to get skip double trip lines and get times for drivers on same
if extract and srcSheet.cell(i,3).value and srcSheet.cell(i,1).value:
#times for multi recorded driver are in cell i+1,32 and i+1,34
if not srcSheet.cell(i,26).value:
tod = srcSheet.cell(i+1,31).value
nod = srcSheet.cell(i+1,33).value
else:
tod = srcSheet.cell(i,26).value
nod = srcSheet.cell(i,28).value
values = (srcSheet.cell(i,0).value, reportDate, srcSheet.cell(i,1).value,
srcSheet.cell(i,3).value, tod, nod,
srcSheet.cell(i,9).value,
srcSheet.cell(i,10).value, srcSheet.cell(i,11).value,
srcSheet.cell(i,12).value, srcSheet.cell(i,2).value,
srcSheet.cell(i,24).value, srcSheet.cell(i,13).value,
srcSheet.cell(i,19).value, srcSheet.cell(i,17).value)
appendLedger(conn, 'dsw', values)
if 'Svc' in srcSheet.cell(i,0).value:
extract = True
def checkDate(conn, thisDate):
"""
check the date for this report against processed reports
return true if we have or false if we have not.
need to do this against sql database
"""
# todo add a cli flag to disable date check for when they break to report format
query = '''SELECT EXISTS(SELECT 1 FROM totals WHERE date="{}");'''.format(thisDate)
c = conn.cursor()
c.execute(query)
if c.fetchone()[0]:
return True
#return False
else:
return False
def getDate(srcSheet):
#set the date for the report and check
#if we have already processed this date
datestring = srcSheet.cell(0,0).value
tempDate = str.split(datestring)[7]
#convert the yyyymmdd to yyyy-mm-dd
reportDate = tempDate[6:10]+'-'+tempDate[0:2]+'-'+tempDate[3:5]
return reportDate
def main():
# get filename and database rom cmdline
parser = argparse.ArgumentParser()
parser.add_argument("inFile")
parser.add_argument("database")
args = parser.parse_args()
# open the excel sheet for reading
srcBook = xlrd.open_workbook(args.inFile)
srcSheet = srcBook.sheet_by_index(0)
reportDate = getDate(srcSheet)
#open the database
conn = openDatabase(args.database)
if conn == None:
sys.exit('database could not be opened, does it exist')
# check if records already exist and then add records if not
if checkDate(conn, reportDate):
print("we have already processed " + reportDate)
else:
extractValues(srcSheet, conn, reportDate)
if __name__ == '__main__':
main()