3
b                 @   sp  d dl mZ d dlZd dlZd dlmZ d dlmZ d dl	Z	d dl
mZ d dl
mZ d dl
mZ d dl
mZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) dd Z*ej+j,dddddgdd Z-ej+j,ddd dgd!d" Z.ej+j,ddddgd#d$ Z/ej+j,dddddgd%d& Z0d'd( Z1d)d* Z2d+d, Z3d-d. Z4ej+j,dddgej+j,d/de5e6gd0d1 Z7ej+j,dddgej+j,d2d3d4gd5d6 Z8ej+j,dddgej+j,d/e6ej9d7ej9d8gd9d: Z:d;d< Z;ej+j,d=dej<d>d?d gd@dA Z=ej+j,d/e5dBgdCdD Z>ej+j,dEddGej<fgdHdI Z?dJdK Z@ej+j,dLejAejBgdMdN ZCej+j,d=dej<d>d?d gdOdP ZDej+j,d/e5dBgdQdR ZEej+j,dSdFgdTggdFgej<gggdUdV ZFdWdX ZGdYdZ ZHd[d\ ZId]d^ ZJd_d` ZKej+j,dadbdcdddedfgdgdh ZLej+j,dide e e e gdjdk ZMdldm ZNdndo ZOdpdq ZPej+j,ddddgdrds ZQdtdu ZRdvdw ZSdxdy ZTej+j,dzd{d|gd}d~ ZUdd ZVej+j,dddeWdfdFdeWdfgdd ZXdd ZYdd ZZej+j,dd dej[d gd{ dgd{ gfddej[ej\ gd{ ej\gd{ gfej\ ej\ej[ej\ gd{ ej\gd{ gfdd|dgdddgej[dd|dgdddggfdej\ dgddej\gej[dej\ dgddej\ggfgdddddgddd Z]ej+j,ddej\ej\ dfdd|gddd gdfgdd Z^ej+j,dddgej\ ej\gfddgdgd dgd gfgddgddd Z_ej+j,dddgdd Z`ej+j,dddFejajbdFdgej+j,dddFejajbdFdgdd Zcej+j,dej[ddFgdFdTggej[ddFgdFdggddddfej[ddFgdFdTggej[d dFgdFdTggdbdddfej[ddFgdFdTggej[ddFgdFdTggddbddfej[ddgddgge6dej[ddgddgge6di dfgdd Zdej+j,dej<ejeej[fd ejfej[fdejfej[fej<ejeejgfdejfejgfej<ejeejAfdejfejAfej<ejeejhfdejfejhfej<ejeejifdejfejifej<ejeejjfdejfejjfgej+j,ddd{ej[d dFdTgfdd{ej[d dFdTgfgdd Zkej+j,dejgejAejhejiejjgdd Zlej+j,ddddgej+j,dej<ej[fd ej[fej<ejgfej<ejAfej<ejhfej<ejifgddƄ ZmddȄ Znej+j,dej[ddgddgge5ddej[ddddgddddgge5dfej[ej<dGgdGej<ggej<ej[dGdGddgdGdGddggfej[ej<dgdej<gge5dej<ej[ddddgddddgge5dfej[ddgddgge5ddej[ddddgddddgge5dfgdd˄ Zoej+j,deegej+j,ddej<dfd
gddӄ ZpddՄ Zqddׄ Zrej+j,deegddل Zsej+j,dejgejAejhejiejjgddۄ Ztej+j,dܐddgdd߄ Zuej+j,dddd{ddTd dFgfdedFd dTdd{gfgdd Zvej+j,dݐdej<gdd Zwej+j,dݐdej<gdd Zxej+j,dddddge5ddTfddddge5ddFfdddge5ddTfddddge5ddTfddFdTd{geyddTfdFdFdFdTgeyddFfddddFgeyddTfdFdFdFdgeyddTfgdd ZzdS (      )divisionN)sparse)kstest)assert_allclose)assert_allclose_dense_sparse)assert_array_equal)assert_array_almost_equal)enable_iterative_imputer)load_diabetes)MissingIndicator)SimpleImputerIterativeImputer)DummyRegressor)BayesianRidgeARDRegressionRidgeCV)Pipeline)
make_union)GridSearchCV)tree)_sparse_random_matrix)ConvergenceWarning)_most_frequentc       	      C   s   d||f }t }| jjdks(|jjdkr,t}t||d}|j| j| j }||j||j	dd ||||j	dd t||d}|jt
j|  |jt
j| j }t
j|r|j }||j||j	dd ||||j	dd dS )zUtility function for testing imputation for a given strategy.

    Test with dense and sparse arrays

    Check that:
        - the statistics (mean, median, mode) are correct
        - the missing values are imputed correctlyz<Parameters: strategy = %s, missing_values = %s, sparse = {0}f)missing_valuesstrategyF)err_msgTN)r   dtypekindr   r   fit	transformcopyZstatistics_formatr   
csc_matrixissparsetoarray)	XX_truer   Z
statisticsr   r   Z	assert_aeimputerX_trans r*   H/tmp/pip-build-yq1vfp91/scikit-learn/sklearn/impute/tests/test_impute.py_check_statistics!   s$    


r,   r   meanmedianmost_frequentconstantc             C   s   t jjdd}t j|d d d< t| d}|jtj|}|jdksFt	|j|}|jdks^t	t
| d}|j|}|jdkst	d S )N
      )r   )initial_strategy)r1   r2   )r1   r2   )r1   r2   )nprandomrandnnanr   fit_transformr   
csr_matrixshapeAssertionErrorr   )r   r&   r(   	X_imputedZiterative_imputerr*   r*   r+   test_imputation_shapeG   s    



r=   conste   c          
   C   sJ   t jd}t j|d< tjtt| d t| d}|j| W d Q R X d S )N      r   )match)r   )r@   rA   )r   r   )	r4   onesr7   pytestraises
ValueErrorstrr   r8   )r   r&   r(   r*   r*   r+   &test_imputation_error_invalid_strategyY   s
    


rH   c             C   sP   t jd}t j|d d df< tjtdd t| dd}|j| W d Q R X d S )	Nr@   rA   r   ZDeleting)rB   T)r   verbose)r@   rA   )r4   rC   r7   rD   warnsUserWarningr   r8   )r   r&   r(   r*   r*   r+    test_imputation_deletion_warningc   s
    
rL   c             C   s   t jd}d|d< tj|}t| dd}tjtdd |j| W d Q R X |j|j	  tjtdd |j
| W d Q R X d S )Nr@   rA   r   )r   r   zProvide a dense array)rB   )r@   rA   )r4   rC   r   r#   r   rD   rE   rF   r   r%   r    )r   r&   r(   r*   r*   r+   test_imputation_error_sparse_0m   s    

rM   c             O   s8   t | dr| jnt| }|dkr&tjS tj| f||S )Nsizer   )hasattrrN   lenr4   r7   r.   )arrargskwargslengthr*   r*   r+   safe_median~   s    rU   c             O   s8   t | dr| jnt| }|dkr&tjS tj| f||S )NrN   r   )rO   rN   rP   r4   r7   r-   )rQ   rR   rS   rT   r*   r*   r+   	safe_mean   s    rV   c              C   s  t jjd} d}d}|| || f}t j|d }t jd|d d }|dd d  |dd d< dt jdd fd	t jd
d fg}x|D ]\}}}	t j|}
t j|}t j|d }xlt|d D ]Z}|| d dk|| d  || d  }t|d ||  || ||   d}|d | | }|d | }t j	||}|| j
t|d |  }|	|||||< t j|||f|
d d |f< d|krt j|t j	|| || f|d d |f< n(t j||t j	|| |f|d d |f< t jj|j|
d d |f  t jj|j|d d |f  qW |d	krFt j|jdd }nt j|jdd }|d d |f }t|
|||| qW d S )Nr   r1         r2   r-   c             S   s   t tj| |fS )N)rV   r4   hstack)zvpr*   r*   r+   <lambda>   s    z-test_imputation_mean_median.<locals>.<lambda>r.   c             S   s   t tj| |fS )N)rU   r4   rY   )rZ   r[   r\   r*   r*   r+   r]      s    )Zaxis)r4   r5   RandomStatezerosaranger7   emptyrangemaxrepeatZpermutationrP   rY   shuffleisnananyallr,   )rngZdimdecr:   r_   valuestestsr   Ztest_missing_valuesZtrue_value_funr&   r'   Ztrue_statisticsjZnb_zerosZnb_missing_valuesZ	nb_valuesrZ   r\   r[   Zcols_to_keepr*   r*   r+   test_imputation_mean_median   sR    

($

"
rn   c              C   s   t jdt jt jgdt jt jgddt jgd
dt jgddt jgddt jgddt jgddt jggj } t jdddgdddgdddgdddgdddgdddgdddgdddggj }ddddddddg}t| |d	|t j d S )Nr   rA   rX   rW   r2   g      @g      @g      ?r.   ro   ro   g      rp   ro   g      rq   g      g      )r4   arrayr7   Z	transposer,   )r&   ZX_imputed_medianZstatistics_medianr*   r*   r+   $test_imputation_median_special_cases   s*    




rs   r   c          
   C   s\   t jdddgdddgddd	gg|d
}d}tjt|d t| d}|j| W d Q R X d S )Nabr@   rX   e   gh	   )r   z6non-numeric data:
could not convert string to float: ')rB   )r   )r4   rr   rD   rE   rF   r   r8   )r   r   r&   msgr(   r*   r*   r+   .test_imputation_mean_median_error_invalid_type   s    
r|   typelist	dataframec          
   C   sn   dddgdddgddd	gg}|d
kr8t jd}|j|}d}t jt|d t| d}|j| W d Q R X d S )Nrt   ru   r@   rX   rv   rw   rx   ry   rz   r   pandasz6non-numeric data:
could not convert string to float: ')rB   )r   )rD   importorskipZ	DataFramerE   rF   r   r8   )r   r}   r&   pdr{   r(   r*   r*   r+   :test_imputation_mean_median_error_invalid_type_list_pandas   s    


r   USc          
   C   s   t jt jt jddgt jdt jdgt jddt jgt jdddgg|d}d}tjt|d	  t| d
}|j|j| W d Q R X d S )Nrt   r   cdru   ry   )r   z#SimpleImputer does not support data)rB   )r   )	r4   rr   r7   rD   rE   rF   r   r   r    )r   r   r&   r   r(   r*   r*   r+   /test_imputation_const_mostf_error_invalid_types  s    
r   c              C   sz   t jdd	ddgd
dddgddddgddddgg} t jdddgdddgdddgdddgg}t| |dt jdddgd d S )NrW   r   rA   r2   r@      r/   rq   rq   rq   rq   rq   rq   rq   rq   )r4   rr   r,   r7   )r&   r'   r*   r*   r+   test_imputation_most_frequent  s    


r   markerZNAN c             C   s   t j| | ddg| d| dg| dd| g| dddggtd}t jdddgdddgdddgdddggtd}t| dd	}|j|j|}t|| d S )
Nrt   r   r   r   ru   ry   )r   r/   )r   r   )r4   rr   objectr   r   r    r   )r   r&   r'   r(   r)   r*   r*   r+   %test_imputation_most_frequent_objects1  s     



r   categoryc             C   sr   t jd}tjd}|j|| d}tjdddgdddgdddgd	ddggtd}td
d}|j	|}t
|| d S )Nr   z,Cat1,Cat2,Cat3,Cat4
,i,x,
a,,y,
a,j,,
b,j,x,)r   rt   ixrm   yru   r/   )r   )rD   r   ioStringIOread_csvr4   rr   r   r   r8   r   )r   r   r   dfr'   r(   r)   r*   r*   r+   $test_imputation_most_frequent_pandasI  s    




r   zX_data, missing_valuerW   g      ?c             C   sN   t jd
| td}||d< tjtdd t|ddd	}|j| W d Q R X d S )Nr@   rA   )r   r   zimputing numerical)rB   r0   r   )r   r   
fill_value)r@   rA   )r   r   )r4   fullfloatrD   rE   rF   r   r8   )ZX_datamissing_valuer&   r(   r*   r*   r+   +test_imputation_constant_error_invalid_typec  s    r   c              C   s   t jddddgddddgddddgdd	d
dgg} t jd
ddd
gdd
dd
gddd
d
gdd	d
d
gg}tddd
d}|j| }t|| d S )NrW   r2   r@   rX   rA   rw   r      rz   r   r0   )r   r   r   rq   rq   rq   rq   rq   rq   rq   rq   )r4   rr   r   r8   r   )r&   r'   r(   r)   r*   r*   r+    test_imputation_constant_integerp  s    






r   array_constructorc             C   s   t jt jddt jgdt jdt jgddt jt jgdddt jgg}t jd
dddgddddgddddgddddgg}| |}| |}tddd	}|j|}t|| d S )Ng?r   g333333?g?gffffff?g      ?rW   r0   )r   r   rq   rq   rq   rq   rq   rq   rq   rq   )r4   rr   r7   r   r8   r   )r   r&   r'   r(   r)   r*   r*   r+   test_imputation_constant_float  s    



r   c             C   s   t j| dd| gd| d| gdd| | gddd	| ggtd
}t jddddgddddgddddgddd	dggtd
}t| ddd}|j|}t|| d S )Nrt   ru   r   r   rv   r   rx   ry   r   )r   missingr0   )r   r   r   )r4   rr   r   r   r8   r   )r   r&   r'   r(   r)   r*   r*   r+   test_imputation_constant_object  s     






r   c             C   sz   t jd}tjd}|j|| d}tjddddgddddgdd	ddgd
d	ddggtd}tdd}|j	|}t
|| d S )Nr   z,Cat1,Cat2,Cat3,Cat4
,i,x,
a,,y,
a,j,,
b,j,x,)r   r   r   r   rt   r   rm   ru   r0   )r   )rD   r   r   r   r   r4   rr   r   r   r8   r   )r   r   r   r   r'   r(   r)   r*   r*   r+   test_imputation_constant_pandas  s    






r   r&   r2   c             C   sf   t  j| }|jdkstt  }|jdgdgg |jdks@t|jdgtjgg |jdksbtd S )Nr   rW   r2   )r   r   n_iter_r;   r4   r7   )r&   r(   r*   r*   r+   "test_iterative_imputer_one_feature  s    r   c              C   st   t dddd} | jd }tdt|dfdtjddfg}d	d
ddgi}t ddddj }t||}|j| | d S )Nd   g?)densityr   r(   )r   r   )random_stateZimputer__strategyr-   r.   r/   rW   )	r   datar   r   r   ZDecisionTreeRegressorr%   r   r   )r&   r   Zpipeline
parametersYZgsr*   r*   r+   $test_imputation_pipeline_grid_search  s    


r   c              C   s|  t ddddd} | j j }tdddd}|j|j|}d
|d< tj||k sVt| j }t|j	d ddd}|j|j|}d|j	d< tj|j	|j	k st| j j }tddd	d}|j|j|}d|d< t
|| | j j }t|j	d dd	d}|j|j|}d|j	d< t
|j	|j	 | j }t|j	d dd	d}|j|j|}d|j	d< tj|j	|j	k sxtd S )NrA   g      ?r   )r   r   r-   T)r   r   r!   rW   Frq   )r   r   rq   rq   )r   r   rq   rq   )r   r!   r%   r   r   r    r4   rh   r;   r   r   Ztocsc)ZX_origr&   r(   Xtr*   r*   r+   test_imputation_copy  s:    



r   c              C   s   t jjd} d}d}t||d| dj }|dk}t j||< tdd}|j|}t||j	j
| tddj|}t j|j
||j	j
|k std|_t|j
||j	j
| d S )Nr   r   r1   g?)r   r   )max_iterrA   )r4   r5   r^   r   r%   r7   r   r8   r   initial_imputer_r    r   rh   r;   r   )ri   nr   r&   Zmissing_flagr(   r<   r*   r*   r+   !test_iterative_imputer_zero_iters!  s    



r   c              C   sp   t jjd} d}d}t||d| dj }tdddd}|j| |j| tdddd}|j| |j| d S )	Nr   r   r@   g?)r   r   rW   )r   r   rI   r2   )r4   r5   r^   r   r%   r   r   r    )ri   r   r   r&   r(   r*   r*   r+   test_iterative_imputer_verbose;  s    


r   c              C   sB   d} d}t j| |f}tddd}|j|}t||jj| d S )Nr   r@   r   rW   )r   r   )r4   r_   r   r8   r   r   r    )r   r   r&   r(   r<   r*   r*   r+   "test_iterative_imputer_all_missingI  s    
r   imputation_orderr5   roman	ascending
descendingarabicc       
      C   sR  t jjd}d}d}d}t||d|dj }d|d d df< td|dd	d
ddd| |d
}|j| dd |jD }t||j	 |j
kst| dkrt j|d |d  t jd|kstn| dkrt j|d |d  t j|d ddkstn^| dkr*|d |d  }||d d  }	||	ksNtn$d| krNt|||d  ksNtd S )Nr   r   r1   r2   g?)r   r   rW   rA   FT)
r   r   n_nearest_featuressample_posteriorskip_complete	min_value	max_valuerI   r   r   c             S   s   g | ]
}|j qS r*   )feat_idx).0r   r*   r*   r+   
<listcomp>i  s    z;test_iterative_imputer_imputation_order.<locals>.<listcomp>r   r   r5   Zendingrq   )r4   r5   r^   r   r%   r   r8   imputation_sequence_rP   r   Zn_features_with_missing_r;   rh   r`   )
r   ri   r   r   r   r&   r(   Zordered_idxZordered_idx_round_1Zordered_idx_round_2r*   r*   r+   'test_iterative_imputer_imputation_orderR  s<    
(.

r   	estimatorc       	      C   s   t jjd}d}d}t||d|dj }tdd| |d}|j| g }xH|jD ]>}| d k	rbt| ntt	 }t
|j|s|t|jt|j qNW tt|t|kstd S )Nr   r   r1   g?)r   r   rW   )r   r   r   r   )r4   r5   r^   r   r%   r   r8   r   r}   r   
isinstancer   r;   appendidrP   set)	r   ri   r   r   r&   r(   hashesZtripletZexpected_typer*   r*   r+   !test_iterative_imputer_estimatorsz  s     

r   c              C   s   t jjd} d}d}t||d| dj }tdddd| d}|j|}tt j||dk d tt j	||dk d t||dk ||dk  d S )	Nr   r   r1   g?)r   r   rW   g?)r   r   r   r   r   )
r4   r5   r^   r   r%   r   r8   r   minrc   )ri   r   r   r&   r(   r   r*   r*   r+   test_iterative_imputer_clip  s    
r   c              C   s   t jjd} d}d}t||d| dj }d|d d df< tdddd	dd
dd| d	}|j|}tt j||dk d tt j	||dk d
 t||dk ||dk  d S )Nr   r   r1   g?)r   r   rW   r2   rA   Tg?r5   )	r   r   r   r   r   r   rI   r   r   )
r4   r5   r^   r   r%   r   r8   r   r   rc   )ri   r   r   r&   r(   r   r*   r*   r+   %test_iterative_imputer_clip_truncnorm  s$    
r   c                 s   t jjd} | jdd t j d d< tddd| dj  t j fdd	td
D }t	|dksnt
t	|dks~t
|j |j  }}t|| | d\}}|dkr|d7 }t|| | d\}}|dk s|dkst
dd S )N*   rA   )rN   r   g      ?T)r   r   r   r   c                s   g | ]}j  d  d  qS )r   )r    )r   _)r&   r(   r*   r+   r     s    zEtest_iterative_imputer_truncated_normal_posterior.<locals>.<listcomp>r   Znormg-q=g?g?z&The posterior does appear to be normal)rA   rA   )r4   r5   r^   Znormalr7   r   r8   rr   rb   rh   r;   r-   Zstdr   )ri   ZimputationsmusigmaZks_statisticZp_valuer*   )r&   r(   r+   1test_iterative_imputer_truncated_normal_posterior  s$    
r   c             C   s   t jjd}d}d}|jdd||fd}|jdd||fd}d|d d df< d|d	< tdd| |dj|}td| dj|}t|j|d d df |j|d d df  d S )
Nr   r   r1   r@   )lowhighrN   rW   )r   r   r3   r   )r   r   )r   r   )	r4   r5   r^   randintr   r   r   r   r    )r   ri   r   r   X_trainX_testr(   Zinitial_imputerr*   r*   r+   +test_iterative_imputer_missing_at_transform  s    r   c              C   s   t jjd} t jjd}d}d}t||d| dj }tddd| d}|j| |j|}|j|}t j|t	j
t j|ksttddd	d d
| d}tddd	d d
|d}	|j| |	j| |j|}
|j|}|	j|}t|
| t|
| d S )Nr   rW   r   r1   g?)r   r   T)r   r   r   r   Fr   )r   r   r   r   r   r   )r4   r5   r^   r   r%   r   r   r    r-   rD   Zapproxr;   r   )Zrng1Zrng2r   r   r&   r(   Z
X_fitted_1Z
X_fitted_2imputer1imputer2ZX_fitted_1aZX_fitted_1br*   r*   r+   .test_iterative_imputer_transform_stochasticity  sB    








r   c              C   s   t jjd} | jdd}t j|d d df< td| d}td| d}|j|j|}|j|}t	|d d dd f | t	|| d S )Nr   r   r1   )r   r   rW   )
r4   r5   r^   randr7   r   r   r    r8   r   )ri   r&   m1m2Zpred1Zpred2r*   r*   r+   !test_iterative_imputer_no_missing.  s    
r   c        	      C   s   t jjd} d}| j|d}| jd|}t j||}| j||dk }|j }t j||< tdd| d}|j|}t	||dd d S )	Nr   2   rW   g      ?rA   )r   rI   r   g{Gz?)atol)
r4   r5   r^   r   dotr!   r7   r   r8   r   )	ri   r   ABr&   nan_mask	X_missingr(   X_filledr*   r*   r+   test_iterative_imputer_rank_one<  s    

r   rankr@   rA   c             C   s   t jjd}d}d}|j|| }|j| |}t j||}|j||dk }|j }t j||< |d }|d | }	||d  }
||d  }tddd|dj|	}|j	|}t
|
|d	d
 d S )Nr   F   g      ?r2   rA   r   rW   )r   r   rI   r   g?)r   )r4   r5   r^   r   r   r!   r7   r   r   r    r   )r   ri   r   r   r   r   r   r   r   r   X_test_filledr   r(   
X_test_estr*   r*   r+   )test_iterative_imputer_transform_recoveryM  s&    

r   c           	   C   s  t jjd} d}d}| j||}| j||}t j|j}xbt|D ]V}xPt|D ]D}|d d || | f  |d d |f |d d |f  d 7  < qPW qBW | j||dk }|j }	t j	|	|< |d }|	d | }
||d  }|	|d  }t
dd| dj|
}|j|}t||dd	d
 d S )Nr   r   r1   r2   g      ?rW   )r   rI   r   gMbP?g{Gz?)rtolr   )r4   r5   r^   r6   r_   r:   rb   r   r!   r7   r   r   r    r   )ri   r   r   r   r   r   r   rm   r   r   r   r   r   r(   r   r*   r*   r+   &test_iterative_imputer_additive_matrixj  s*    H

r   z"max_iter, tol, error_type, warninggMbP?zshould be a positive integerzshould be a non-negative floatc          	   C   s>   t jd}t| |d}tj||d |j| W d Q R X d S )Nr   r2   )r   tol)rB   )r   r2   )r4   r_   r   rD   rE   r8   )r   r   Z
error_typewarningr&   r(   r*   r*   r+   "test_iterative_imputer_error_param  s    
r   c              C   s   t jjd} d}d}| j|d}| jd|}t j||}| j||dk }|j }t j||< tdddd| d	}|j|}	t	|j
||j kstt|jdd| d
}|j|}
t|	|
dd tdddd| d	}|j| |j|jkstd S )Nr   r   rA   rW   g      ?r   g{Gz?F)r   r   r   rI   r   )r   r   rI   r   gHz>)r   )r4   r5   r^   r   r   r!   r7   r   r8   rP   r   r   r;   r   r   r   )ri   r   r   r   r   r&   r   r   r(   ZX_filled_100ZX_filled_earlyr*   r*   r+   %test_iterative_imputer_early_stopping  s:    



r   c           
   C   s   t dd\} }| j\}}d| d d df< tjjd}d}x<t|D ]0}|jtj|t|| dd}tj	| ||f< qBW t
d	dd
}tjd }	|j| |}
W d Q R X |	j sttjtj|
 std S )NT)Z
return_X_yrW   r@   r   g333333?F)rN   replacerA   )r   r   )r
   r:   r4   r5   r^   rb   choicer`   intr7   r   rD   rJ   r8   r~   r;   rg   rf   )r&   r   Z	n_samples
n_featuresri   Zmissing_rateZfeatZ
sample_idxr(   recordZX_fillr*   r*   r+   $test_iterative_imputer_catch_warning  s    
r   z$min_value, max_value, correct_outputr   r1      i,  ZscalarszNone-defaultinflistszlists-with-inf)idsc             C   s   t jjdjdd}t| |d}|j| t|jt jrFt|j	t jsJt
|jjd |jd krv|j	jd |jd kszt
t|dd d f |j t|dd d f |j	 d S )Nr   r1   r@   )r   r   rW   )r4   r5   r^   r6   r   r   r   Z
_min_valuendarrayZ
_max_valuer;   r:   r   )r   r   Zcorrect_outputr&   r(   r*   r*   r+   )test_iterative_imputer_min_max_array_like  s    
r   zmin_value, max_value, err_msgmin_value >= max_value.z_value' should be of shapec          	   C   s@   t jjd}t| |d}tjt|d |j| W d Q R X d S )Nr1   r@   )r   r   )rB   )r1   r@   )r4   r5   r   rD   rE   rF   r   )r   r   r   r&   r(   r*   r*   r+   *test_iterative_imputer_catch_min_max_error  s    r  zmin_max_1, min_max_2rX   zNone-vs-infzScalar-vs-vectorc             C   s   t jt jdddgdt jt jdgddt jdgt jddt jgg}t jt jdt jdgddt jt jgt jdddgg}t| d | d dd	}t|d |d dd	}|j|j|}|j|j|}t|d d df |d d df  d S )
Nr2   rW   r1   r   r@   rX   rA   r   )r   r   r   )r4   rr   r7   r   r   r    r   )Z	min_max_1Z	min_max_2r   r   r   r   ZX_test_imputed1ZX_test_imputed2r*   r*   r+   4test_iterative_imputer_min_max_array_like_imputation  s$    r  r   TFc             C   s   t jjd}t jddddgddddgddddgdd	ddgg}t jt jdd	dgt jd	ddgt jdddgg}td
| |d}|j|j|}| rt|d d df t j	|d d df  n t|d d df dddgdd d S )Nr   rA   r2   rW   r1   r   r@   r   rX   r-   )r3   r   r         g-C6?)r   )
r4   r5   r^   rr   r7   r   r   r    r   r-   )r   ri   r   r   r(   r   r*   r*   r+   'test_iterative_imputer_skip_non_missing  s     


*r  
rs_imputer)seedrs_estimatorc             C   sH   G dd d}||d}t | d}tjd}|j| |j|ksDtd S )Nc               @   s$   e Zd Zdd Zdd Zdd ZdS )zCtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimatorc             S   s
   || _ d S )N)r   )selfr   r*   r*   r+   __init__:  s    zLtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.__init__c             _   s   | S )Nr*   )r	  rR   Zkgardsr*   r*   r+   r   =  s    zGtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.fitc             S   s   t j|jd S )Nr   )r4   r_   r:   )r	  r&   r*   r*   r+   predict@  s    zKtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.predictN)__name__
__module____qualname__r
  r   r  r*   r*   r*   r+   ZeroEstimator9  s   r  )r   r1   r@   )r1   r@   )r   r4   r_   r   r   r;   )r  r  r  r   r(   r   r*   r*   r+   ,test_iterative_imputer_dont_set_random_state0  s    	




r  zX_fit, X_trans, params, msg_errzmissing-onlyauto)featuresr   zBhave missing values in transform but have no missing values in fitz3'features' has to be either 'missing-only' or 'all'rh   z&'sparse' has to be a boolean or 'auto'rt   ru   r   )r   z1MissingIndicator does not support data with dtypec          	   C   sD   t dd}|jf | tjt|d |j| j| W d Q R X d S )NrW   )r   )rB   rq   )r   
set_paramsrD   rE   rF   r   r    )X_fitr)   paramsZmsg_err	indicatorr*   r*   r+   test_missing_indicator_errorJ  s    
r  zmissing_values, dtype, arr_typez,param_features, n_features, features_indicesc             C   s  t j| | dgdd| gg}t j| | dgdddgg}t jdddgdddgg}t jdddgdddgg}	||j|}||j|}|j|}|	j|}	t| |dd}
|
j|}|
j|}|jd |kst|jd |kstt|
j	| t
||d d |f  t
||	d d |f  |jtks&t|jtks6tt|t jsHtt|t jsZt|
jd	d
 |
j|}|
j|}|jtkst|jtkst|jdkst|jdkstt
|j | t
|j | d S )NrW   rX   r2   r  r1   r   F)r   r  r   T)r   csc)r4   rr   astyper   r8   r    r:   r;   r   Z	features_r   r   boolr   r   r  r"   r%   )r   arr_typer   Zparam_featuresr   Zfeatures_indicesr  r)   ZX_fit_expectedZX_trans_expectedr  
X_fit_maskX_trans_maskZX_fit_mask_sparseZX_trans_mask_sparser*   r*   r+   test_missing_indicator_new`  sB    





r  r  c             C   s   d}t j||dgd|dgg}t j||dgdddgg}| |}| |}t|d}tjtdd	 |j| W d Q R X |j| tjtdd	 |j| W d Q R X d S )
Nr   rW   rX   r2   r  r1   )r   z"Sparse input with missing_values=0)rB   )r4   rr   r   rD   rE   rF   r8   r    )r  r   r  r)   ZX_fit_sparseZX_trans_sparser  r*   r*   r+   5test_missing_indicator_raise_on_sparse_with_missing_0  s    

r  param_sparsezmissing_values, arr_typec             C   sL  t j||dgd|dgg}t j||dgdddgg}| |jt j}| |jt j}t||d}|j|}|j|}|dkr|jdkst|jdkstn|d	kr|d
krt	|t j
stt	|t j
stn||dkrt	|t j
stt	|t j
stnRtj|r$|jdkst|jdksHtn$t	|t j
s6tt	|t j
sHtd S )NrW   rX   r2   r  r1   )r   r   Tr  r  r   F)r4   rr   r  float64r   r8   r    r"   r;   r   r   r   r$   )r  r   r   r  r)   r  r  r  r*   r*   r+   #test_missing_indicator_sparse_param  s0    

r"  c              C   sX   t jdddgdddggtd} tddd}|j| }t|t jdddgdddgg d S )	Nrt   ru   r   )r   rh   )r   r  TF)r4   rr   r   r   r8   r   )r&   r  r)   r*   r*   r+   test_missing_indicator_string  s
    
r#  zX, missing_values, X_trans_expc             C   s0   t t|ddt|d}|j| }t|| d S )Nr/   )r   r   )r   )r   r   r   r8   r   )r&   r   ZX_trans_expZtransr)   r*   r*   r+   #test_missing_indicator_with_imputer  s
    

r$  imputer_constructorz.imputer_missing_values, missing_value, err_msgNaNzInput contains NaN-1(types are expected to be both numerical.c          	   C   sR   t jjd}|jdd}||d< | |d}tjt|d |j| W d Q R X d S )Nr   r1   r   )r   )rB   )r   r   )r4   r5   r^   r6   rD   rE   rF   r8   )r%  Zimputer_missing_valuesr   r   ri   r&   r(   r*   r*   r+   (test_inconsistent_dtype_X_missing_values  s    
r)  c              C   sB   t jddgddgg} tddd}|j| }|jd dks>td S )NrW   zmissing-only)r  r   r   rq   )r4   rr   r   r8   r:   r;   )r&   mir   r*   r*   r+   !test_missing_indicator_no_missing  s
    

r+  c              C   sP   t jdddgdddgdddgg} tddd}|j| }|j |j ksLtd S )Nr   rW   r2   rh   )r  r   )r   r9   r   r8   Zgetnnzsumr;   )r&   r*  r   r*   r*   r+   /test_missing_indicator_sparse_no_explicit_zeros'  s    
r-  c             C   s8   t jddgddgg}|  }|j| |jd ks4td S )NrW   )r4   rr   r   Z
indicator_r;   )r%  r&   r(   r*   r*   r+   test_imputer_without_indicator4  s
    

r.  c          
   C   s   | t jddgdt jdgddt jgdddgg}t jddd	dd
d
gdddd
dd
gddd	d
d
dgdddd
d
d
gg}tt jdd}|j|}tj|st|j|jkstt	|j
 | d S )NrW   rA   r2   rw   r@   rz   g      @g      ?g      @g        g       @g      @g      "@T)r   add_indicator)r4   r7   rr   r   r8   r   r$   r;   r:   r   r%   )r  ZX_sparser'   r(   r)   r*   r*   r+   2test_simple_imputation_add_indicator_sparse_matrix?  s    



r0  zstrategy, expectedr   c             C   sN   ddgdt jgg}t jddgd|ggtd}t| d}|j|}t|| d S )Nrt   ru   r   )r   )r   )r4   r7   rr   r   r   r8   r   )r   expectedr&   r'   r(   r)   r*   r*   r+   "test_simple_imputation_string_list\  s    

r2  zorder, idx_orderc             C   s   t jjd}|jdd}t j|d ddf< t j|d ddf< t j|d dd	f< t j|d d
df< tjt6 td| ddj	|}dd |j
D }||kstW d Q R X d S )Nr   r   rA   r   rW      r      r2   r1   rX   )r   r   r   c             S   s   g | ]
}|j qS r*   )r   )r   r   r*   r*   r+   r     s    z)test_imputation_order.<locals>.<listcomp>)r4   r5   r^   r   r7   rD   rJ   r   r   r   r   r;   )orderZ	idx_orderri   r&   Ztrsidxr*   r*   r+   test_imputation_ordero  s    	r7  c             C   sH  t jd| ddgddddgdd| dgddd	| gg}t jddd
dgd
d| dgd| ddgddd
| gg}t jd| ddg| d| | gd
| d| g| d| dgg}t jddddg| d
| dgd
dddg| d| d
gg}t| ddd}|j|}|j|}|j|}|j|}	t|| t|	| x0||gD ]$}
|j|
}|j|}t||
 qW d S )Nrz   r@   rW   rX   rA   rw   r   r   r   r2   r-   T)r   r   r/  rq   rq   rq   )r4   rr   r   r8   inverse_transformr    r   )r   X_1ZX_2ZX_3ZX_4r(   	X_1_transZX_1_inv_transZ	X_2_transZX_2_inv_transr&   r)   ZX_inv_transr*   r*   r+   (test_simple_imputation_inverse_transform  s@    



















r;  c          	   C   sz   t jd| ddgddddgdd| dgddd	| gg}t| d
d}|j|}tjtd|j dd |j| W d Q R X d S )Nrz   r@   rW   rX   rA   rw   r   r   r   r-   )r   r   zGot 'add_indicator=')rB   rq   rq   rq   )	r4   rr   r   r8   rD   rE   rF   r/  r8  )r   r9  r(   r:  r*   r*   r+   3test_simple_imputation_inverse_transform_exceptions  s    



r=  z)expected,array,dtype,extra_value,n_repeatextra_valueZmost_frequent_valuevaluer   Zmin_valuevaluerZ   r4  c             C   s"   | t tj||d||kstd S )N)r   )r   r4   rr   r;   )r1  rr   r   r>  Zn_repeatr*   r*   r+   test_most_frequent  s    r@  )rW   r   rq   gMbPro   ro   ro   ro   )r   r   r   ro   iirq   rq   rq   rq   rq   rq   rq   rq   rq   rq   rq   rq   rq   rq   )r'  rq   r(  )r/   ru   )r0   r   rq   rq   ){
__future__r   rD   Znumpyr4   Zscipyr   Zscipy.statsr   r   Zsklearn.utils._testingr   r   r   r   Zsklearn.experimentalr	   Zsklearn.datasetsr
   Zsklearn.imputer   r   r   Zsklearn.dummyr   Zsklearn.linear_modelr   r   r   Zsklearn.pipeliner   r   Zsklearn.model_selectionr   Zsklearnr   Zsklearn.random_projectionr   Zsklearn.exceptionsr   Zsklearn.impute._baser   r,   markZparametrizer=   rH   rL   rM   rU   rV   rn   rs   r   rG   r|   r   r   r   r   r7   r   r   r   r   r9   Zasarrayr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rF   r   r   r   rr   r   r   r  r  r  r5   r^   r  r  r!  Zint32r#   Z
coo_matrixZ
lil_matrixZ
bsr_matrixr  r  r"  r#  r$  r)  r+  r-  r.  r0  r2  r7  r;  r=  r   r@  r*   r*   r*   r+   <module>   s  &

F*" ",.	& #0
	#$**" 
 (*,	$.



# 

1