Oleh f116f53d61
Loops auto-vectorization problem fix (#277)
* libnd4j cast loop types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more type castination added to loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j sync casting types of iterated variable in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more loops reviewed for vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed several typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several more files reviewed to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several type casting added in broadcasting that were missed, fixed mac builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j double check all files and fix several more places in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j revert changes for lup.cpp

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more files reviewed for auto-vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>
2020-02-28 17:04:45 +03:00

60 lines
2.4 KiB
C++

/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// Created by raver119 on 24/09/18.
//
#include <ops/declarable/helpers/where.h>
#include <array/NDArrayList.h>
namespace nd4j {
namespace ops {
namespace helpers {
template <typename T>
static void __where(NDArray &condition, NDArray& output, memory::Workspace *workspace) {
NDArrayList list(0, true);
int cnt = 0;
Nd4jLong idx[MAX_RANK];
for (Nd4jLong e = 0; e < condition.lengthOf(); e++) {
shape::index2coords(e, condition.getShapeInfo(), idx);
auto offset = shape::getOffset(condition.getShapeInfo(), idx);
if (condition.e<bool>(offset)) {
auto array = NDArrayFactory::create_('c', {1, condition.rankOf()}, output.dataType(), output.getContext());
for (int f = 0; f < condition.rankOf(); f++)
array->p(f, (T) idx[f]);
list.write(cnt++, array);
}
}
auto s = list.stack();
output.assign(s);
delete s;
}
BUILD_SINGLE_TEMPLATE(template void __where,(NDArray &condition, NDArray& output, memory::Workspace *workspace), LIBND4J_TYPES);
void _where(nd4j::LaunchContext * context, NDArray &condition, NDArray& output, memory::Workspace *workspace) {
condition.syncToHost();
BUILD_SINGLE_SELECTOR(output.dataType(), __where, (condition, output, workspace), LIBND4J_TYPES);
output.syncToDevice();
}
}
}
}