asp网站后台无法编辑,东方财富网官方网站首页,wordpress页面自定义页面跳转,网站建设商标在哪个类别算法描述#xff1a; (1). 先做自变量x的范围检查#xff0c;不能出现负数和0. 自己使用时#xff0c;如果能通过其它途径保证自变量为正#xff0c;那么可以省略这两个判断#xff0c;提高速度。 (2). 根据IEEE 754浮点数的格式#xff0c;#xff0c;则 ln(x)kln(2)ln…算法描述 (1). 先做自变量x的范围检查不能出现负数和0. 自己使用时如果能通过其它途径保证自变量为正那么可以省略这两个判断提高速度。 (2). 根据IEEE 754浮点数的格式则 ln(x)kln(2)ln(m)可以通过位运算方便快速地获取k和m . (3). 把 ln(1x) 和 ln(1-x) 在 x0 处的泰勒级数相减 因为m的范围是(1, 2)不够接近1如果直接令m(1x)/(1-x)那么x不够接近0代入上面的泰勒级数则精度不够高所以要对m进行变换常见的做法是乘上sqrt(2)/2即 如果改为乘以 2/3则 这个区间长度为12/350.34285714比区间的长度0.34314575更短代入泰勒级数后的精度更高一些。 多项式求值采用秦九韶算法同时还使用fmadd指令加速运算(融合乘加intel _mm_fmadd_sd)计算机如何计算对数函数_数值计算】求对数函数值,输入实数x0 ,输出x对应的对数函数值ln(x)(使用双精度dou-CSDN博客更详细地解释了如何利用IEEE 754浮点数的格式获取k和m.
标准库的算法可参考glibc/sysdeps/ieee754/dbl-64/s_log1p.c at master · bminor/glibc · GitHub
最终的效果是精度与标准库互有胜负(以windows的计算器以及Wolfram|Alpha: Computational Intelligence作为参考值)如果不对自变量为NAN的情况进行处理速度稍快于标准库。 C代码如下
#includestdio.h
#includemath.h
#includetime.h
#includeimmintrin.h#define FMADD
constexpr double ln2 0.6931471805599453;
constexpr double ln3_2 0.40546510810816438; // ln(3/2)
constexpr double sqrt2_2 0.7071067811865475; // sqrt(2)/2
constexpr unsigned long long x000F 0x000FFFFFFFFFFFFF;
constexpr unsigned long long x3FF0 0x3FF0000000000000;__m128d c17 _mm_set_sd(2.0 / 17.0);
__m128d c15 _mm_set_sd(2.0 / 15.0);
__m128d c13 _mm_set_sd(2.0 / 13.0);
__m128d c11 _mm_set_sd(2.0 / 11.0);
__m128d c9 _mm_set_sd(2.0 / 9.0);
__m128d c7 _mm_set_sd(2.0 / 7.0);
__m128d c5 _mm_set_sd(2.0 / 5.0);
__m128d c3 _mm_set_sd(2.0 / 3.0);
__m128d c1 _mm_set_sd(2.0);inline double myln(double x) {if (x 0) {return NAN;}if (x 0) {return -INFINITY;}unsigned long long llx *reinterpret_castunsigned long long*(x);short k (llx 52) - 1023; // x 2^k * munsigned long long llm (llx x000F) | x3FF0;double m *reinterpret_castdouble*(llm);m * 0.66666666666666666; //m * sqrt2_2;x (m - 1.0) / (m 1.0);double x2 x * x;
#ifdef FMADD__m128d x128 _mm_set_sd(x);__m128d x2_128 _mm_set_sd(x2);__m128d t128 c17;t128 _mm_fmadd_sd(t128, x2_128, c15);t128 _mm_fmadd_sd(t128, x2_128, c13);t128 _mm_fmadd_sd(t128, x2_128, c11);t128 _mm_fmadd_sd(t128, x2_128, c9);t128 _mm_fmadd_sd(t128, x2_128, c7);t128 _mm_fmadd_sd(t128, x2_128, c5);t128 _mm_fmadd_sd(t128, x2_128, c3);t128 _mm_fmadd_sd(t128, x2_128, c1);t128 _mm_mul_sd(t128, x128);m _mm_cvtsd_f64(t128);
#elsem 2.0 / 17.0;m m * x2 2.0 / 15.0;m m * x2 2.0 / 13.0;m m * x2 2.0 / 11.0;m m * x2 2.0 / 9.0;m m * x2 2.0 / 7.0;m m * x2 2.0 / 5.0;m m * x2 2.0 / 3.0;m m * x2 2.0;m * x;
#endifreturn k * ln2 ln3_2 m; // return (k 0.5) * ln2 m; //如果前面 m * sqrt2_2那就需要用这一行return
}int main() {printf(double, 精度测试\n);for (double x 0.1; x 3; x 0.1) {printf(myln(%2.1f)%18.16lf\n ln(%2.1f)%18.16lf\n-------\n, x, myln(x), x, log(x));}printf(速度测试编译器优化设为/O2CPUCore i7-11800H \n);clock_t start clock();double sum 0;double x1 0.01, x2 1000, dx 1e-6;for (double x x1; x x2; x dx) {sum myln(x) / x;}printf(sum%lf, myln_Time: %fs\n, sum, (double)(clock() - start) / CLOCKS_PER_SEC);start clock();sum 0;for (double x x1; x x2; x dx) {sum log(x) / x;}printf(sum%lf, ln_Time: %fs\n, sum, (double)(clock() - start) / CLOCKS_PER_SEC);
} 运行结果如下
double, 精度测试
myln(0.1)-2.3025850929940459ln(0.1)-2.3025850929940455
-------
myln(0.2)-1.6094379124341003ln(0.2)-1.6094379124341003
-------
myln(0.3)-1.2039728043259359ln(0.3)-1.2039728043259359
-------
myln(0.4)-0.9162907318741550ln(0.4)-0.9162907318741550
-------
myln(0.5)-0.6931471805599396ln(0.5)-0.6931471805599453
-------
myln(0.6)-0.5108256237659907ln(0.6)-0.5108256237659907
-------
myln(0.7)-0.3566749439387324ln(0.7)-0.3566749439387324
-------
myln(0.8)-0.2231435513142100ln(0.8)-0.2231435513142098
-------
myln(0.9)-0.1053605156578265ln(0.9)-0.1053605156578264
-------
myln(1.0)-0.0000000000000002ln(1.0)-0.0000000000000001
-------
myln(1.1)0.0953101798043247ln(1.1)0.0953101798043247
-------
myln(1.2)0.1823215567939545ln(1.2)0.1823215567939546
-------
myln(1.3)0.2623642644674911ln(1.3)0.2623642644674911
-------
myln(1.4)0.3364722366212130ln(1.4)0.3364722366212130
-------
myln(1.5)0.4054651081081644ln(1.5)0.4054651081081646
-------
myln(1.6)0.4700036292457357ln(1.6)0.4700036292457357
-------
myln(1.7)0.5306282510621706ln(1.7)0.5306282510621706
-------
myln(1.8)0.5877866649021192ln(1.8)0.5877866649021193
-------
myln(1.9)0.6418538861723950ln(1.9)0.6418538861723950
-------
myln(2.0)0.6931471805599509ln(2.0)0.6931471805599455
-------
myln(2.1)0.7419373447293780ln(2.1)0.7419373447293776
-------
myln(2.2)0.7884573603642703ln(2.2)0.7884573603642705
-------
myln(2.3)0.8329091229351041ln(2.3)0.8329091229351043
-------
myln(2.4)0.8754687373539001ln(2.4)0.8754687373539003
-------
myln(2.5)0.9162907318741552ln(2.5)0.9162907318741554
-------
myln(2.6)0.9555114450274365ln(2.6)0.9555114450274368
-------
myln(2.7)0.9932517730102837ln(2.7)0.9932517730102838
-------
myln(2.8)1.0296194171811583ln(2.8)1.0296194171811586
-------
myln(2.9)1.0647107369924285ln(2.9)1.0647107369924287
-------
速度测试编译器优化设为/O2CPUCore i7-11800H
sum13254515.057331, myln_Time: 3.817000s
sum13254515.057331, ln_Time: 3.838000s