3
bC                 @   sD  d dl Zd dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ ejjdd	d
gejjdedddd Zejjdejd2gdd Zejjdejd3gdd Zejjdejd4gdd Zejjdejd5gdd Zejjdejd6gdd Zejjdejd7gdd Zejjdejd8gdd Zejjdejd9gdd  Zd!d" Zejjd#dd gejjdd:ejgejjd$d%d& Zejjdd;ejgejjdd	d
gd'd( Zejjdd<ejgd)d* Zejjd#dd gejjdd=ejgd+d, Z ejjd-d?ejd/fgd0d1 Z!dS )@    N)config_context)
KNNImputer)nan_euclidean_distances)pairwise_distances)KNeighborsRegressor)assert_allcloseweightsuniformdistancen_neighbors      c             C   sL   d}d}t jj||}t j|d< t|| d}|j|}|j||fksHtd S )N
      r   )r   r   )r   r   )nprandomZrandnanr   fit_transformshapeAssertionError)r   r   Zn_rowsZn_colsXimputer	X_imputed r   E/tmp/pip-build-yq1vfp91/scikit-learn/sklearn/impute/tests/test_knn.pytest_knn_imputer_shape   s    

r   nac          %   C   sL  t jt jddd| gdddddgdddddg| ddddg| d	dd	dgddddd	gg}tjtd
d t| dj| W d Q R X t jt jddd| gdddddgdddddg| ddddg| d	dd	dgddddd	gg}t jdddd| gdddddgdddddg| ddddg| d	dd	dgddddd	gg}t| dj|}tjtd
d |j| W d Q R X tjtdd t| ddj| W d Q R X tdddd}t jt j	ddddgt j	ddt j	dgt j	ddddgt j	ddddgg}d}tjt|d |j| W d Q R X t jddgt j	dgg}tdd}d}tjt|d |j| W d Q R X d S )Nr   r         r   r            zInput contains (infinity|NaN))match)missing_valueszExpected n_neighbors > 0)r#   r   r	   )r#   r   r   zHInput contains NaN, infinity or a value too large for dtype\('float64'\)	euclidean)metricz/The selected metric does not support NaN values)
r   arrayinfpytestZraises
ValueErrorr   fit	transformr   )r   r   ZX_fitr   msgZbad_metric_msgr   r   r   +test_knn_imputer_default_with_invalid_input   sX    
r-   c          
   C   s   t jdd| dddgdd| dddgdd| dd| gdd| | ddgg}t| ddj|}|j|}t j|j  srt|jdkstt j	d	d
j
dd}|j|}t|d d d	ddddgf | d S )Nr   g      ?r   r      r   )r#   r   r   r      )r.   r   )r   r&   r   r*   r+   isnananyr   r   ZarangeZreshaper   )r   r   knnZX_transformZX_testr   r   r   (test_knn_imputer_removes_all_na_features^   s    

r3   c          	   C   s   t jdddddgdddddgdddddgdddddgg}t jd| dddgdddddgdddd| gdd| ddgg}t jdddddgdddddgdddddgdddddgg}tddd	d
}t| dd	d
}t|j|| t|j||j| d S )Nr   r   g      ?r   r   r   g      @g      ?r	   )r#   r   r   )r   r&   r   r   r   )r   ZX_zeroZX_nanr   Zimputer_zeroZimputer_nanr   r   r   *test_knn_imputer_zero_nan_imputes_the_sameq   s,    
r4   c             C   s   t jddddgddd| gddd| g| dddgd| ddgd	d	d	d	gd
dddgg}t jddddgdddd	gdddd	gddddgddddgd	d	d	d	gd
dddgg}t| d}t|j|| t jddd| gddd| gddd| gddd| gddd| gd	d	d	| gddddgddddgg}d}t jddd|gddd|gddd|gddd|gddd|gd	d	d	|gddddgddddgg}t| d}t|j|| t jddg| dgddgddgddgdd	gdd
gg}t jddgddgd| gg}d}t jddgddgd|gg}t| d}t|j|j|| d S )Nr   r   r   r   r.   r   r   r!   r               )r#         	      *   g      5@r   r;   r5      g333333@)r   r&   r   r   r   r*   r+   )r   r   r   r   ZX_impute_valueX1ZX_2_1
X1_imputedr   r   r   test_knn_imputer_verify   st    




























rA   c          	   C   s   t jddg| dgddgd| gddg| dgdd	gg}t jddgddgddgddgddgddgdd	gg}td
| d}t|j|| d S )Nr   r   r.   r   r   r!   r      r    r   )r   r#   )r   r&   r   r   r   )r   r   r   r   r   r   r    test_knn_imputer_one_n_neighbors   s$    rC   c          	   C   s   t jddg| dgddgd| gddg| dgdd	gg}t jddgd
dgddgddgddgd
dgdd	gg}|jd d }t|| d}t|j|| |jd }t|| d}t|j|| d S )Nr   r   r.   r   r   r!   r   rB   r    r   g      @r   )r   r#   )r   r&   r   r   r   r   )r   r   r   r   r   Zimputer_plus1r   r   r   *test_knn_imputer_all_samples_are_neighbors  s,    
rD   c          	   C   s   t jddg| dgddgddgddgdd	gd
dgg}t jddgddgddgddgddgdd	gd
dgg}td| d}t|j|| dd }t|| d}t|j|| dd }t|| d}t|j|| d S )Nr   r   r.   r   r   r   r!   r;   r   r<   r   r	   )r   r#   c             S   s   d S )Nr   )distr   r   r   	no_weight>  s    z2test_knn_imputer_weight_uniform.<locals>.no_weightc             S   s
   t j| S )N)r   Z	ones_like)rE   r   r   r   uniform_weightE  s    z7test_knn_imputer_weight_uniform.<locals>.uniform_weight)r   r&   r   r   r   )r   r   ZX_imputed_uniformr   rF   rG   r   r   r   test_knn_imputer_weight_uniform"  s0    rH   c       $      C   sB  t jddg| dgddgddgddgdd	gd
dgg}tddd}ddddddg}|j||dd f ||df  |j|dddd f d }dddddg}t|ddd d f || d}d|d d |f j  }t j||df |d}t jddg|dgddgddgddgdd	gd
dgg}	t jddg|dgddgddgddgdd	gd
dgg}
td| d}t	|j
||	 t	|j
||
 t j| ddgdddgdddgdddgg}t jd}t jd&}t jddgd| d| gd}t j|ddgdddgdddgdddgg}tdd| d}t	|j
|| t jddddgd| d| gddd| gddddgddddgddddgddddgg}t|| d}|ddddddgf }|ddddddgf }d| }d| }|ddddddgf }d| }t jj|dddddgdf j }t jj|dddddgdf j }t jj||d}t jj||d}t jj||d}t jddddgd|d|gddd|gddddgddddgddddgddddgg}td| d}t	|j
|| t jddd| gddd| gdd| dgddddgddddgddddgddddg| dddgg}t|dd| d}d|ddd'f  }d|ddd(f  }d|d*  }d|dddf  }t j|dd+d,f |d}t j|dd-d.f |d} t j|d0 |d}!t j|dddf |d}"t jddd|gddd| gdd|!dgddddgddddgddddgddddg|"dddgg}t| dd}#t	|#j
|| d S )1Nr   r   r.   r   r   r   r!   r;   r   r<   r   r$   r
   )r%   r   r   )r#   )r   )r   r#   )r   r   r#   nan_euclideanF)r%   squaredr#   g      ?)r#   r   g      ?r   r   r   r.   r   g      @g      ?r   r.   r   r;   r    g     3@rK   r   r   r   r.   r   )r   rL   rK   rK   rK   rK   r   r   r   r.   r   )rM   r   )r   r&   r   r*   Zpredictr   Zravelaverager   r   r   sqrtmaZmasked_invalidcopyr   )$r   r   nnZ
X_rows_idxZknn_imputed_valueZX_neighbors_idxrE   r   Zmanual_imputed_valueZX_imputed_distance1ZX_imputed_distance2r   Zdist_0_1Zdist_0_2Zimputed_valuer   Zr1c1_nbor_distsZr1c3_nbor_distsZr1c1_nbor_wtZr1c3_nbor_wtZr2c3_nbor_distsZr2c3_nbor_wtZcol1_donor_valuesZcol3_donor_valuesZr1c1_impZr1c3_impZr2c3_impZr0c3_wZr1c3_wZr2c2_wZr7c0_wr0c3r1c3r2c2r7c0Zimputer_comp_wtr   r   r    test_knn_imputer_weight_distanceL  s     







""



















rW   c              C   s   t jdfdd} t jdddt jgddddgddddgt jdd	d
gg}d}d}t jddd|gddddgddddg|dd	d
gg}td| d}t|j|| d S )NFc             S   sD   t jj| t j| d} t jj|t j|d}t jt j| | }|S )N)mask)r   rP   r&   r0   Znansumabs)xyr#   rJ   rE   r   r   r   custom_callable  s    z9test_knn_imputer_callable_metric.<locals>.custom_callabler.   r   r   r;   r   r<   g      $@r   )r   r%   r7   g      "@r   g      @)r   r   r&   r   r   r   )r\   r   ZX_0_3ZX_3_0r   r   r   r   r    test_knn_imputer_callable_metric  s    




r]   working_memoryzignore:adhere to working_memoryc       
      C   s\  t jd| d| gddd| gdd| dgddddgddddgddddgddddg| dddgg}t j|dddf }t j|dddf }t j|dddf }t j|dddddgdf }t j|dddf }t jd|d|gddd|gdd|dgddddgddddgddddgddddg|dddgg}t|d	  t| d
}	t|	j|| W d Q R X d S )Nr   r   r   r   r.   r   r   r!   )r^   )r#   rK   rK   rK   rK   rK   )r   r&   Zmeanr   r   r   r   )
r   r^   r   Zr0c1rS   rT   rU   rV   r   Zimputer_compr   r   r   $test_knn_imputer_with_simple_example  s4    














r_   c             C   s   t j| dg| dgd| gg}t jddgddgddgg}t| d|d}t|j|| t jd| gg}t jddgg}t|j|| d S )Nr<   r   r   r   )r#   r   r   r.   )r   r&   r   r   r   r+   )r   r   r?   r@   r2   X2Z
X2_imputedr   r   r   +test_knn_imputer_not_enough_valid_distances-  s    ra   c             C   s   t j| dg| dgg}t| dd}t jdgdgg}t|j|| t jddgd| gg}t jdgdgg}t|j|| d S )Nr   r   )r#   r   r   g      ?)r   r&   r   r   r   r+   )r   r?   r2   ZX1_expectedr`   ZX2_expectedr   r   r   'test_knn_imputer_drops_all_nan_featuresD  s    rb   c             C   sj  t jd| gd| g| dgddgddg| dgg}t|dd| d	}t j|ddd
f d
|dddf  d}t j|ddd
f d
|d
ddf  d}t j|dddf d
|dddf  d}t j|dddf d
|dddf  d}t jd|gd|g|dgddgddg|dgg}t|dB t| ddd}	t|	j|| t| ddd}
t|
j|| W d Q R X d S )Nr   r   r.   r   r   r   rI   F)r%   rJ   r#   r   r   )r   )r^   r
   )r#   r   r   )r   r&   r   rN   r   r   r   r   )r   r^   r   rE   ZX_01ZX_11ZX_20ZX_50Z
X_expectedZknn_3Zknn_4r   r   r   7test_knn_imputer_distance_weighted_not_enough_neighborsV  s6    ****rc   zna, allow_nanFTc             C   s"   t | d}|j d |kstd S )N)r#   	allow_nan)r   Z	_get_tagsr   )r   rd   r2   r   r   r   test_knn_tags~  s    
re   rK   rK   rK   rK   rK   rK   rK   rK   rK   rK   rK   rK   rK   )rK   F)"Znumpyr   r(   Zsklearnr   Zsklearn.imputer   Zsklearn.metrics.pairwiser   r   Zsklearn.neighborsr   Zsklearn.utils._testingr   markZparametrizeranger   r   r-   r3   r4   rA   rC   rD   rH   rW   r]   filterwarningsr_   ra   rb   rc   re   r   r   r   r   <module>   s8   C#R * $'