装修公司网站该怎么做,做自媒体你不得不知道的视频网站,顺德做网站shundeit,系统开发过程中最重要最关键的环节是1.问题背景描述
python项目中的时序数据都存放在TD数据库中#xff0c;数据是秒级存入的#xff0c;当查询一周数据时将超过50w数据量#xff0c;这是一次性获取全量数据到python程序很慢#xff0c;全流程10秒以上#xff0c;希望进行优化加速
2.排查
首先#xff0c…1.问题背景描述
python项目中的时序数据都存放在TD数据库中数据是秒级存入的当查询一周数据时将超过50w数据量这是一次性获取全量数据到python程序很慢全流程10秒以上希望进行优化加速
2.排查
首先分步排查从td取超过7秒在python程序中处理格式超过3秒 其次业务逻辑处理步骤中时间大量消耗的逻辑是 时间对象转成字符串 再次td取数步骤中时间大量消耗的逻辑是 获取到数据后时间戳转为时间对象 最后思路确定为从td获取的ts字段直接按bigint返回交由业务逻辑处理直接从bigint转成字符串
3.查源码尝试修改
TD取的ts字段是用bigint存的C_TIMESTAMP类型排查源码发现公共包中会将C_TIMESTAMP类型的字段都转成datetime对象返回而转化方法_convert_millisecond_to_datetime就是慢的根源继续查相关源码若想用bigint返回发现CONVERT_FUNC_BLOCK这个函数工厂key是每个字段类型fields[i][“type”]这个字段类型是在taos_fetch_fields
# TDHelper().db_query(sql) 调用入口
class TDHelper: # 自定义的TD适配逻辑def db_query(self, sql, return_timestampFalse):with self.cursor() as c:return self._query_handler(c, sql, return_timestamp)def _query_handler(self, cursor, sql, return_timestampFalse):try:cursor.execute(sql) # execute中会获取_fields属性由决定后续字段序列化的逻辑# cursor._fields[0]._type 5 # 修改测试用result cursor.fetchall()if not return_timestamp:return resultelse:ret_result []for one in result:ret_result.append((one[0].timestamp(), *one[1:]))return ret_resultexcept ProgrammingError as e:if e.msg Fail to get table info, error: Table does not exist:# 只输出sql不需要输出异常信息logger.warning(Table does not exist, sql [{}].format(sql))return []raise e# 如下都是TD公共包中的源码def fetchall(self): # cursor的方法if self._result is None:raise OperationalError(Invalid use of fetchall)fields self._fields if self._fields is not None else taos_fetch_fields(self._result)buffer [[] for i in range(len(fields))]self._rowcount 0while True:block, num_of_rows taos_fetch_block(self._result, self._fields) # 关键逻辑errno taos_errno(self._result)if errno ! 0:raise ProgrammingError(taos_errstr(self._result), errno)if num_of_rows 0:breakself._rowcount num_of_rowsfor i in range(len(self._fields)):buffer[i].extend(block[i])return list(map(tuple, zip(*buffer)))def taos_fetch_block(result, fieldsNone, field_countNone):if fields is None:fields taos_fetch_fields(result)if field_count is None:field_count taos_field_count(result)pblock ctypes.c_void_p(0)num_of_rows _libtaos.taos_fetch_block(result, ctypes.byref(pblock))if num_of_rows 0:return None, 0precision taos_result_precision(result)blocks [None] * field_countfor i in range(len(fields)):data ctypes.cast(pblock, ctypes.POINTER(ctypes.c_void_p))[i]if fields[i][type] not in CONVERT_FUNC_BLOCK_v3 and fields[i][type] not in CONVERT_FUNC_BLOCK:raise DatabaseError(Invalid data type returned from database)offsets []is_null []if fields[i][type] in (FieldType.C_VARCHAR, FieldType.C_NCHAR, FieldType.C_JSON):offsets taos_get_column_data_offset(result, i, num_of_rows)blocks[i] CONVERT_FUNC_BLOCK_v3[fields[i][type]](data, is_null, num_of_rows, offsets, precision)else:is_null [taos_is_null(result, j, i) for j in range(num_of_rows)]# 关键逻辑blocks[i] CONVERT_FUNC_BLOCK[fields[i][type]](data, is_null, num_of_rows, offsets, precision)return blocks, abs(num_of_rows)CONVERT_FUNC_BLOCK {FieldType.C_BOOL: _crow_bool_to_python,FieldType.C_TINYINT: _crow_tinyint_to_python,FieldType.C_SMALLINT: _crow_smallint_to_python,FieldType.C_INT: _crow_int_to_python,FieldType.C_BIGINT: _crow_bigint_to_python,FieldType.C_FLOAT: _crow_float_to_python,FieldType.C_DOUBLE: _crow_double_to_python,FieldType.C_BINARY: _crow_binary_to_python_block,FieldType.C_TIMESTAMP: _crow_timestamp_to_python, # 关键逻辑FieldType.C_NCHAR: _crow_nchar_to_python_block,FieldType.C_TINYINT_UNSIGNED: _crow_tinyint_unsigned_to_python,FieldType.C_SMALLINT_UNSIGNED: _crow_smallint_unsigned_to_python,FieldType.C_INT_UNSIGNED: _crow_int_unsigned_to_python,FieldType.C_BIGINT_UNSIGNED: _crow_bigint_unsigned_to_python,FieldType.C_JSON: _crow_nchar_to_python_block,
}def _crow_timestamp_to_python(data, is_null, num_of_rows, nbytesNone, precisionFieldType.C_TIMESTAMP_UNKNOWN):Function to convert C bool row to python row._timestamp_converter _convert_millisecond_to_datetime # 关键逻辑if precision FieldType.C_TIMESTAMP_MILLI:_timestamp_converter _convert_millisecond_to_datetimeelif precision FieldType.C_TIMESTAMP_MICRO:_timestamp_converter _convert_microsecond_to_datetimeelif precision FieldType.C_TIMESTAMP_NANO:_timestamp_converter _convert_nanosecond_to_datetimeelse:raise DatabaseError(Unknown precision returned from database)return [None if is_null[i] else _timestamp_converter(ele)for i, ele in enumerate(ctypes.cast(data, ctypes.POINTER(ctypes.c_int64))[: abs(num_of_rows)])]def _convert_millisecond_to_datetime(milli):try:if _priv_tz is None:return _datetime_epoch timedelta(secondsmilli / 1000.0)return (_utc_datetime_epoch timedelta(secondsmilli / 1000.0)).astimezone(_priv_tz) # 万恶之源except OverflowError:# catch OverflowError and passprint(WARN: datetime overflow!)pass
4. 最终修改和效果
修改ts字段的类型从C_TIMESTAMP改为C_BIGINT相关逻辑如下 参数说明cursor 即TDHelper().cursor()获得sql ‘select ts,val from table_1 ORDER BY ts desc limit 1’ 最后效果50w数据从10s优化到3s def test(cursor, sql)try:cursor.execute(sql)# ts字段的类型修改为bigintif cursor._fields[0]._type FieldType.C_TIMESTAMP:cursor._fields[0]._type FieldType.C_BIGINTreturn cursor.fetchall()except ProgrammingError as e:if e.msg Fail to get table info, error: Table does not exist:# 只输出sql不需要输出异常信息logger.warning(Table does not exist, sql [{}].format(sql))return []raise e