PK!keras_nalu/__init__.pyPK!Fwxxkeras_nalu/nalu.py"""Keras NALU module""" from keras import backend as K from keras import constraints from keras import initializers from keras import regularizers from keras.engine import InputSpec from keras.layers import Layer from keras.utils.generic_utils import get_custom_objects class NALU(Layer): """Keras NALU layer""" def __init__( self, units, G_constraint=None, G_initializer='glorot_uniform', G_regularizer=None, M_hat_constraint=None, M_hat_initializer='glorot_uniform', M_hat_regularizer=None, W_hat_constraint=None, W_hat_initializer='glorot_uniform', W_hat_regularizer=None, cell=None, e=1e-28, **kwargs, ): assert cell in ['a', 'm', None] super(NALU, self).__init__(**kwargs) self.cell = cell self.G = None self.G_constraint = constraints.get(G_constraint) self.G_initializer = initializers.get(G_initializer) self.G_regularizer = regularizers.get(G_regularizer) self.M_hat = None self.M_hat_constraint = constraints.get(M_hat_constraint) self.M_hat_initializer = initializers.get(M_hat_initializer) self.M_hat_regularizer = regularizers.get(M_hat_regularizer) self.W_hat = None self.W_hat_constraint = constraints.get(W_hat_constraint) self.W_hat_initializer = initializers.get(W_hat_initializer) self.W_hat_regularizer = regularizers.get(W_hat_regularizer) self.e = e self.supports_masking = True self.units = units def build(self, input_shape): input_dim = input_shape[-1] if self.cell is None: self.G = self.add_weight( constraint=self.G_constraint, initializer=self.G_initializer, name='G', regularizer=self.G_regularizer, shape=(input_dim, self.units), ) self.M_hat = self.add_weight( constraint=self.M_hat_constraint, initializer=self.M_hat_initializer, name='M_hat', regularizer=self.M_hat_regularizer, shape=(input_dim, self.units), ) self.W_hat = self.add_weight( constraint=self.W_hat_constraint, initializer=self.W_hat_initializer, name='W_hat', regularizer=self.W_hat_regularizer, shape=(input_dim, self.units), ) self.built = True self.input_spec = InputSpec(axes={-1: input_dim}, min_ndim=2) def call(self, inputs, **kwargs): W = K.tanh(self.W_hat) * K.sigmoid(self.M_hat) a = K.dot(inputs, W) m = K.exp(K.dot(K.log(K.abs(inputs) + self.e), W)) if self.cell == 'a': y = a elif self.cell == 'm': y = m else: g = K.sigmoid(K.dot(inputs, self.G)) y = (g * a) + ((1 - g) * m) return y def compute_output_shape(self, input_shape): output_shape = list(input_shape) output_shape[-1] = self.units output_shape = tuple(output_shape) return output_shape def get_config(self): base_config = super(NALU, self).get_config() config = { 'G_constraint': constraints.serialize(self.G_constraint), 'G_initializer': initializers.serialize(self.G_initializer), 'G_regularizer': regularizers.serialize(self.G_regularizer), 'M_hat_constraint': constraints.serialize(self.M_hat_constraint), 'M_hat_initializer': initializers.serialize(self.M_hat_initializer), 'M_hat_regularizer': regularizers.serialize(self.M_hat_regularizer), 'W_hat_constraint': constraints.serialize(self.W_hat_constraint), 'W_hat_initializer': initializers.serialize(self.W_hat_initializer), 'W_hat_regularizer': regularizers.serialize(self.W_hat_regularizer), 'cell': self.cell, 'e': self.e, 'units': self.units, } return {**base_config, **config} get_custom_objects().update({'NALU': NALU}) PK!!keras_nalu/pretrained/__init__.pyPK!~Ӟ(r(rkeras_nalu/pretrained/model.h5HDF  (r` TREEHEAPX0model_weightsoptimizer_weights( Hkeras_version @backend  H model_config ChTREEHHEAPX input_1nalu_1nalu_28` P layer_namesinput_1nalu_1nalu_2 @backend HhGCOL2.2.4 tensorflow {"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "input_1", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 32], "dtype": "float32", "sparse": false, "name": "input_1"}, "inbound_nodes": []}, {"name": "nalu_1", "class_name": "NALU", "config": {"name": "nalu_1", "trainable": true, "G_constraint": null, "G_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "G_regularizer": null, "M_hat_constraint": null, "M_hat_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "M_hat_regularizer": null, "W_hat_constraint": null, "W_hat_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "W_hat_regularizer": null, "cell": "m", "e": 1e-28, "units": 16}, "inbound_nodes": [[["input_1", 0, 0, {}]]]}, {"name": "nalu_2", "class_name": "NALU", "config": {"name": "nalu_2", "trainable": true, "G_constraint": null, "G_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "G_regularizer": null, "M_hat_constraint": null, "M_hat_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "M_hat_regularizer": null, "W_hat_constraint": null, "W_hat_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "W_hat_regularizer": null, "cell": "a", "e": 1e-28, "units": 1}, "inbound_nodes": [[["nalu_1", 0, 0, {}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["nalu_2", 0, 0]]}} tensorflow2.2.4{"optimizer_config": {"class_name": "RMSprop", "config": {"lr": 9.99999883788405e-07, "rho": 0.8999999761581421, "decay": 0.0, "epsilon": 1e-07}}, "loss": "mae", "metrics": [], "sample_weight_mode": null, "loss_weights": null}hSNOD8`x7@D`F Hkeras_versionTREEHEAPXPSNOD8X 003 H weight_names ?@4 4 TREE0%HEAPXx nalu_1H8X X weight_namesnalu_1/M_hat:0nalu_1/W_hat:0(    '}\h"$TREEx&HEAPX$M_hat:0W_hat:0@SNODp""$SNOD`!/2_ Q_zzAd_-2_f^L_.q_,H_5__,a_@_I-_.__ _N__*___N {Ac_G_-_ D__=_"_j`_ ,_VF_l^_}^b{A4_B`UN_^q_D_Q_MC`_(__*__*_~c_'p_^9_^Ä{A^?_2_ <_._ A_v_2_?_l_l{A_E$_I_ __A_^E_o_<5_^*<_ʣzAO_eX_am_N___~_aH_$_^)_a_&_)H_n_4_o6__RO_S_-*_B^ _^E9_ _%zAE=__/_H__*^(_E=_^^;#_6_V_b_^u_)$_zAJ_"5__^&_^<_^"_t^_____ +_j_Z^*){AX_N%_^H_k1_^]Q__@_$W_D_%&_P__9{A_M_&zA_+_^^"_F_ _^!_H_H!_;^n_#_ _n_^t^6_2l_AX_5_#zAX%_*_+_^_}_=_H _H_(_h+_^P_^vB_^_zA__0_^^_2_^"`_t_H_ _~^^0_*C_آ_n{A^_^?_K__+{A^r_^R;__/_;___y^6_[__c_^_)__^^zAj_^_^2__^/_ߖ_O2_{AH_%___^_T___ _/_-_-E_ _C_i._o_0_^k_zAtU_g_Y#_1_ )_!____>_J_m^ơ__+zA^_ _,_3;_z<_?_1E`^B^L_ _^o___)0_+_vU_9{A%\_3_F_W_^ڼ_ _+_<_ _ {A-_ _fV_W_@_ _~J_^_O_I_'{AS_!_>_$C_5{`_'M_^._I_=_c_IA`6_-__#Q_9^P_(_3_d3_C_9D_I_M_MU{AF_3^\9_' `_2__^A_6_Z_|__y__!_%_lzA _q_|@_ _^N_F _w^,_y__D_<&_9_e_:_^={A'_;`(^._s_^_mf__X^ _^y_#_Is{A*_._{A_3_^c^_5_|^^i"_?P_2<_^^ _^3_e#_>_N_7_v_'Y_ʣzAA_^D_p?_T_B^_ _H6_ !_&_^H_V_)_M_k _9{A_H4__X^_^g^^ _?"__^{_^^J^_r_{A^,^6_ ^__^tzAj^?%_^1_];_^I_^5_7w_^L_N_(_'.__|4_^^^u{A_^_^^+6_^;_(    7}\h3TREE8@HEAPX83nalu_2H03 X weight_namesnalu_2/M_hat:0nalu_2/W_hat:0(  G@}\hX5?TREEAFûa;@p9*Jero@/ٻ;e.=V;Hi:ФyFh!ƻ6:;x亓LJBPATUZ|/ydXBz:7ʺh=AT濻ba>}5:` ;':{HP4ֻ:O #;:О8ixAP1aΐmO; ;,sm-IwdU;i$:RA|ݹ<;6ҶTK;= VP(7*}D:qe;3m;9=Xlӻnۻ90_1 ;z;A"[|;4º̞:' `kX;6 b: \붺_2;TA_:pOL1H`:'˻UY:U H,:@B`&9ؕ(WL;p-\|h9;A?A2%:`;^<7YX6 cA|hE;2ujCջ$;VҹG';/l:?Ax p:G˺Tm0::q-mvجMo<kѻ;综;%;sʻһp14:;=;0\A:9QQ߻e; ;ZJ;288g9Q;L#;Eͻ!:^AϡԻV8F \&䍻cu;!;0(;}3PA4t@:s:H>ADP2;BWѻ9ܻ,%;k;:<5ֻŻ"\@@9 A";3;`=l 9F &\.;,尺` s";8):k&+9Zk;u9$AZ2pqXƻ%;ѷz:jAVGϻC;ûn5Ai{`d8\sf;%1;s:bm P: ST:hp4)A`@;\ĺ;TQɺ A3};X9V"5B`;ƽ?; <Ķ:UL:$жh?5ڻbA։(ݻt;Ko60:HEAPX?M_hat:0W_hat:0@SNOD05X5?SNOD 4B(  H@}\h Htraining_configTREESHEAPXFtraining@@D`F  weight_namestraining/RMSprop/Variable:0training/RMSprop/Variable_1:0training/RMSprop/Variable_2:0training/RMSprop/Variable_3:0{ALzA&zA#zA%zAzA%N{AYt{Ar{A{Aʴ{AzAzA%N{Ab{A(b{A AA!AjeAo^AU@AwAqA;pAAAAAqAA(    (Z}\hPSTREEWHEAPX8SRMSpropHSNODPPSU WTREEX8HEAPXH@WVariable:0Variable_1:0Variable_2:0Variable_3:0SNODTU WSNODO(b(8c8Hd3FQ2/Wx{7 \ EBaNX7" )`g,g|4P  P}3Vx`<խq~8Cgkd-w.3D̹ 4Sc'2޹6=$*  EzÂׂ%Xpi}  $[B}iCrozĖQgk~Q%VC6x[0TMyPV+eERmv2WRt,'lٵj#^#jZˏI,en@Vk8ߎE-V"&!7(NV dZ& h3n-T19A|=6@g]5GmW#vhryϷ.(ڢ+82K>4  F߉Au\7 e /^M†I,q V Tm5/oS=rͩba4vFG*ETǬ`l^C{z'RMaRI۝ afšV h\S 9 _OZSk#Kdտ!pME8]p gNvn,nQMtbfE)z"@[pR(@py-RyP&`{dR?zrt_G5Z j0pG9"x(Jya^*oFla;G/I 9 oq 6܁x=(X(9>: Ľb9Im><hE\d~524` y U(X\><ʯ>)jXC~CO I,WKz]jNJ .OsnT~C'41׳$c :v$'\ )W9Ml&Y$`[I Et} b.FQm T=eozM1iĚ#cpԂE&l{aY*a<F i.w's+`0X#GҹIœޤ |`[V6_ؽQJ$Ծ_aF` DsbRyG҂ I"]M5f/JUw09u+Bz6p+Clw$DdwD^/%;c)mx79ŀ5=aDQLO+:D3u 9 }wJ (    (j}\h(  @H@}\h(  H@}\h$ ͙$+*?%%>#%~}%6%g%HA% %^$Ҳ%.2%r8%eC0%}5A%)%j%K<%$:`%j6%W$ %E`$)N"%P%`%$4.%=4%VB$$c %$16%!%_ %fv%$%%$$%@H%1PN%$<%a%(+%}F$%w%u%'%+5%%/%%ji%GN %&$Ƞ$˃ %NS$!&%4$%L$[$ $w,%υ%\<6% ~% $.%]$$z$>@$VD%cL$0$‡$,#%:$o[%w.%Ӻ7%u %r$W$  %0$_%% %ۮ$$;@$]$Lm %%E%,|B% %$9l%W%Ԥ%BL%V%|$P5%%E?%<%ԇ%% !%*%J6%DU%-+%:$xC!%N%$%P%%B%l"%3%U%q%"j$$k(%.$&%I%Ӄ$%%9%r$$<;%$%m%Tq%~&A%$6%%Ql%_%)%Ll%վ2%Qh%8%a%C=G%«%t%S:%>F%%$%Q%T$e%$$ #Z%]%ޝ`%&`%q%E$Z$G$%$L.%qq%z$$w$a%sg%RK%$`.%x %2%%Q@%%$"g% $:%N%[E%D%W%o%+$%%$]$b%n%w$H%r8%%?%$> $$Y%Wq%~$K%$%%x~% %~%}'%t%D,%lS%bj#%M%h%_C%vv@%?%u$%D%' %^m%x+%3K%*%\8%J[%+R%;B%hy/%sk% .%MI%M $$2%=%0% %1p%y2%%%R3%^%=l(%*R%O%eJ$b %'%b>%7$m!%Q%u+%M8%4%%!2%%5%kT%o$ %3%=$0%jo$pC%$e$5%,%Qx%%$A3%%Pj %UN$lk$%.%%%M$ #%]%Q% %o %$$>x%%X>%n1%L%e%ڐ%5%k%1%xz%%h%mo%Dk%@$5%$R%X %ϳL%aN %, '%C$ -%$Jj%T%Q$%W$/$'v%($CD %%%_%:%$$%8$HZ$~X%t&%4 %e$%#%~$M%$O$ %hc1%H%)%$%$$$@#%%x$$#$(%.%S$(%: %=$U%4%k%#%%f%w$$pH%$l{%6I%7$%% %`*%2q%$<%̸$$A~$!8%!$X %$A %$%$E%% %[$@%I%Zl%0%zc%Ҕ%yX%dK%%KF%ry2%0V%]%X%4%'0%7$}W$N_$$ $^%Q%%$<_%#%$%;%%4%;e1%Y$y18%}%"N%fR%%`=%H%Y%έ%H#%,%%j%O6% K$`"$$Ƶ$e$J@ %ng%R_$8 %j$$5%N$0%`$PK!keras_nalu/pretrained/model.py"""Keras NALU pretrained model""" from os import path from keras.models import load_model import numpy as np def get_B(base, precision, size): """Get B tensor of X coefficients""" exps = range(precision[0], precision[1], -1) pows = [base ** exp for exp in exps] B = np.tile(pows, (size, 1)) return B def get_model(): """Get the NALU pretrained model""" return load_model( path.join(path.dirname(__file__), 'model.h5') ) PK!8ڍkeras_nalu/pretrained/train.py"""Pretrain Keras NALU model on counting task""" from os import path from keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN from keras.layers import Input from keras.models import Model from keras.optimizers import RMSprop import numpy as np from keras_nalu.nalu import NALU def generate_dataset(batch_size, number_width, multiplier): """Generate dataset for a task""" X = np.zeros((batch_size, 2 * number_width)) Y = np.zeros((batch_size)) for i in range(batch_size): a = multiplier * np.random.rand(number_width) b = multiplier * np.random.rand(number_width) X[i] = np.concatenate([a, b]) Y[i] = np.sum(a * b) return X, Y def train(): """Train Keras NALU model on counting task""" model_dir = path.dirname(__file__) number_width = 16 X_train, Y_train = generate_dataset( batch_size=2**18, multiplier=1, number_width=number_width, ) X_validation, Y_validation = generate_dataset( batch_size=2**9, multiplier=9999, number_width=number_width, ) X_test, Y_test = generate_dataset( batch_size=2**9, multiplier=9999, number_width=number_width, ) inputs = Input(shape=(2 * number_width,)) hidden = NALU(units=number_width, cell='m')(inputs) outputs = NALU(units=1, cell='a')(hidden) callbacks = [ TerminateOnNaN(), ReduceLROnPlateau( factor=0.1, min_lr=1e-16, patience=50, verbose=1, ), EarlyStopping( patience=200, restore_best_weights=True, verbose=1, ), ] model = Model(inputs=inputs, outputs=outputs) model.summary() model.compile(loss='mae', optimizer=RMSprop(lr=0.01)) model.fit( batch_size=256, callbacks=callbacks, epochs=1000, validation_data=(X_validation, Y_validation), verbose=2, x=X_train, y=Y_train, ) model.evaluate( batch_size=256, verbose=1, x=X_test, y=Y_test, ) model.save(path.join(model_dir, 'model.h5')) if __name__ == '__main__': train() PK!HڽTU keras_nalu-1.3.0.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!HKO #keras_nalu-1.3.0.dist-info/METADATAVmo6_qC>DlN0vARA˰lF:Kl)#);wD?swϽQ>E O4Vf{ɔg ˖iɳd.ںf3klwg'W  {jk^9mgiZ W7I-o\:w"rl,O/{ܬ)lOh6pAKXs!gP|vA<? ㆠY?z;eu(b) `6pQ+,8mL>U^ע)7eK{|zT|dOxq_#a]_f^g{;f3n|u~2s f =BxUhVƗMvxR/Z7#':p D'U#ۧCarG[*y#S}O "EMbW63[ Eˮ,ďJv[J)S],d2٧'7q)C0B5*opeiRoҨ%^#uCu wS<^hy$_ր,:ܡuA$je@rRh |{ _0Kq U-i.ja:LDnI8I&>c\bN,"GǬE|[cGN3պ>@?׳0A{ͺYwv/h{]Bb$nmqL#I=M,;*%lo(Bm6t'w)5ĝmFCah& >PQ:zHZ"8d8nZ@R2vHOp Zz\)S] / SͼK vkխtBKi!V1HZG0pu%jkAQrC7`I6ZMp8U)[#͕O(]lcJ"`.{o,D5Ƌ.aj+$DM%]Ԑsc#Um)?.Sxe)w?PK!HP͋!keras_nalu-1.3.0.dist-info/RECORDλr@JP*c$. A$3+ET9/q I5I R I4 )6?ĭnEzSNi^YӼp'h~PKm'r@\Cp\znc WXB7ssGХ>݌Xoou baNX^|QSXziPN\d!:Rt6BQ.96J܀Ab^^/-97Ӑݘx.)XcePtAkf<p$)ܢZSwU5f+/sII+r,EuBQ]Yʊ%iFBV>@N\$)PK!keras_nalu/__init__.pyPK!Fwxx4keras_nalu/nalu.pyPK!!keras_nalu/pretrained/__init__.pyPK!~Ӟ(r(rkeras_nalu/pretrained/model.h5PK!keras_nalu/pretrained/model.pyPK!8ڍkeras_nalu/pretrained/train.pyPK!HڽTU xkeras_nalu-1.3.0.dist-info/WHEELPK!HKO # keras_nalu-1.3.0.dist-info/METADATAPK!HP͋!ekeras_nalu-1.3.0.dist-info/RECORDPK /