jupyter
jupytext
kernelspec
text_representation
extension
format_name
format_version
jupytext_version
.md
markdown
1.2
1.4.1
display_name
language
name
Python 3
python
python3
import time
tic = time .time ()
much_job = [x ** 2 for x in range (1 ,1000000 ,3 )]
toc = time .time ()
print ('used {:.5}s' .format (toc - tic ))
% % time
much_job = [x ** 2 for x in range (1 ,1000000 ,3 )]
from timeit import timeit
g = lambda x :x ** 2 + 1
def main ():
return (g (2 )** 120 )
#timeit('main()',setup = 'from __main__ import main',number = 10)
timeit ('main()' ,globals = {'main' :main },number = 10 )
% % timeit - n 10
g = lambda x :x ** 2 + 1
def main ():
return (g (2 )** 120 )
main ()
def relu (x ):
return (x if x > 0 else 0 )
def main ():
result = [relu (x ) for x in range (- 100000 ,100000 ,1 )]
return result
import profile
profile .run ('main()' )
!pip install line_profiler
% load_ext line_profiler
def relu (x ):
return (x if x > 0 else 0 )
def main ():
result = [relu (x ) for x in range (- 100000 ,100000 )]
return result
from line_profiler import LineProfiler
lprofile = LineProfiler (main ,relu )
lprofile .run ('main()' )
lprofile .print_stats ()
% lprun - f main - f relu main ()
data = (i ** 2 + 1 for i in range (1000000 ))
list_data = list (data )
set_data = set (data )
% % time
1098987 in list_data
% % time
1098987 in set_data
list_a = [2 * i - 1 for i in range (1000000 )]
list_b = [i ** 2 for i in list_a ]
dict_ab = dict (zip (list_a ,list_b ))
% % time
print (list_b [list_a .index (876567 )])
% % time
print (dict_ab .get (876567 ,None ))
% % time
s ,i = 0 ,0
while i < 10000 :
i = i + 1
s = s + i
print (s )
% % time
s = 0
for i in range (1 ,10001 ):
s = s + i
print (s )
a = [i ** 2 + 1 for i in range (2000 )]
% % time
b = [i / sum (a ) for i in a ]
% % time
sum_a = sum (a )
b = [i / sum_a for i in a ]
% % time
def fib (n ):
return (1 if n in (1 ,2 ) else fib (n - 1 )+ fib (n - 2 ))
print (fib (30 ))
% % time
from functools import lru_cache
@lru_cache (100 )
def fib (n ):
return (1 if n in (1 ,2 ) else fib (n - 1 )+ fib (n - 2 ))
print (fib (30 ))
% % time
def fib (n ):
return (1 if n in (1 ,2 ) else fib (n - 1 )+ fib (n - 2 ))
print (fib (30 ))
% % time
def fib (n ):
if n in (1 ,2 ):
return (1 )
a ,b = 1 ,1
for i in range (2 ,n ):
a ,b = b ,a + b
return (b )
print (fib (30 ))
% % time
def my_power (x ):
return (x ** 2 )
def my_power_sum (n ):
s = 0
for i in range (1 ,n + 1 ):
s = s + my_power (i )
return (s )
print (my_power_sum (1000000 ))
% % time
from numba import jit
@jit
def my_power (x ):
return (x ** 2 )
@jit
def my_power_sum (n ):
s = 0
for i in range (1 ,n + 1 ):
s = s + my_power (i )
return (s )
print (my_power_sum (1000000 ))
12,使用collections.Counter类加速计数
data = [x ** 2 % 1989 for x in range (2000000 )]
% % time
values_count = {}
for i in data :
i_cnt = values_count .get (i ,0 )
values_count [i ] = i_cnt + 1
print (values_count .get (4 ,0 ))
% % time
from collections import Counter
values_count = Counter (data )
print (values_count .get (4 ,0 ))
13, 使用collections.ChainMap加速字典合并
dic_a = {i :i + 1 for i in range (1 ,1000000 ,2 )}
dic_b = {i :2 * i + 1 for i in range (1 ,1000000 ,3 )}
dic_c = {i :3 * i + 1 for i in range (1 ,1000000 ,5 )}
dic_d = {i :4 * i + 1 for i in range (1 ,1000000 ,7 )}
% % time
result = dic_a .copy ()
result .update (dic_b )
result .update (dic_c )
result .update (dic_d )
print (result .get (9999 ,0 ))
% % time
from collections import ChainMap
chain = ChainMap (dic_a ,dic_b ,dic_c ,dic_d )
print (chain .get (9999 ,0 ))
% % time
a = range (1 ,1000000 ,3 )
b = range (1000000 ,1 ,- 3 )
c = [3 * a [i ]- 2 * b [i ] for i in range (0 ,len (a ))]
% % time
import numpy as np
array_a = np .arange (1 ,1000000 ,3 )
array_b = np .arange (1000000 ,1 ,- 3 )
array_c = 3 * array_a - 2 * array_b
% % time
import math
a = range (1 ,1000000 ,3 )
b = [math .log (x ) for x in a ]
% % time
import numpy as np
array_a = np .arange (1 ,1000000 ,3 )
array_b = np .log (array_a )
import numpy as np
array_a = np .arange (- 100000 ,1000000 )
% % time
# np.vectorize可以将普通函数转换成支持向量化的函数
relu = np .vectorize (lambda x : x if x > 0 else 0 )
array_b = relu (array_a )
% % time
relu = lambda x :np .where (x > 0 ,x ,0 )
array_b = relu (array_a )
import numpy as np
import pandas as pd
df = pd .DataFrame (np .random .randint (- 10 ,11 ,size = (100000 ,26 )),
columns = list ('abcdefghijklmnopqrstuvwxyz' ))
% time dfresult = df .applymap (lambda x :np .sin (x )+ np .cos (x ))
% % time
dfresult = np .sin (df ) + np .cos (df )
% % time
import pandas as pd
import numpy as np
df = pd .DataFrame (columns = list ('abcdefghijklmnopqrstuvwxyz' ) )
for i in range (10000 ):
df .loc [i ,:] = range (i ,i + 26 )
% % time
import pandas as pd
import numpy as np
df = pd .DataFrame (np .zeros ((10000 ,26 )),
columns = list ('abcdefghijklmnopqrstuvwxyz' ))
for i in range (10000 ):
df .loc [i ,:] = range (i ,i + 26 )
import numpy as np
import pandas as pd
df = pd .DataFrame (np .random .randint (- 10 ,11 ,size = (10000 ,5 )),
columns = list ('abced' ))
% % time
df .to_excel ('data.xlsx' )
% % time
df .to_csv ('data.csv' )
20,使用pandas多进程工具pandarallel
import pandas as pd
import numpy as np
df = pd .DataFrame (np .random .randint (- 10 ,11 ,size = (10000 ,26 )),
columns = list ('abcdefghijklmnopqrstuvwxyz' ))
% % time
result = df .apply (np .sum ,axis = 1 )
% % time
from pandarallel import pandarallel
pandarallel .initialize (nb_workers = 4 )
result = df .parallel_apply (np .sum ,axis = 1 )
import numpy as np
import pandas as pd
df = pd .DataFrame (np .random .randint (0 ,6 ,size = (100000000 ,5 )),
columns = list ('abcde' ))
% time df .groupby ('a' ).mean ()
import dask .dataframe as dd
df_dask = dd .from_pandas (df ,npartitions = 40 )
% time df_dask .groupby ('a' ).mean ().compute ()
import time
def muchjob (x ):
time .sleep (5 )
return (x ** 2 )
% % time
result = [muchjob (i ) for i in range (5 )]
result
% % time
from dask import delayed ,compute
from dask import threaded ,multiprocessing
values = [delayed (muchjob )(i ) for i in range (5 )]
result = compute (* values ,scheduler = 'multiprocessing' )
% % time
def writefile (i ):
with open (str (i )+ '.txt' ,'w' ) as f :
s = ('hello %d' % i )* 10000000
f .write (s )
# 串行任务
for i in range (30 ):
writefile (i )
% % time
import threading
def writefile (i ):
with open (str (i )+ '.txt' ,'w' ) as f :
s = ('hello %d' % i )* 10000000
f .write (s )
# 多线程任务
thread_list = []
for i in range (30 ):
t = threading .Thread (target = writefile ,args = (i ,))
t .setDaemon (True ) #设置为守护线程
thread_list .append (t )
for t in thread_list :
t .start () #启动线程
for t in thread_list :
t .join () #等待子线程结束
% % time
import time
def muchjob (x ):
time .sleep (5 )
return (x ** 2 )
#串行任务
ans = [muchjob (i ) for i in range (8 )]
print (ans )
% % time
import time
import multiprocessing
def muchjob (x ):
time .sleep (5 )
return (x ** 2 )
#多进程任务
pool = multiprocessing .Pool (processes = 4 )
result = []
for i in range (8 ):
result .append (pool .apply_async (muchjob , (i ,)))
pool .close ()
pool .join ()
ans = [res .get () for res in result ]
print (ans )