Why doesn't debugger stop at breakpoints inside certain functions?

For example:

 


def tokenize_pairs(pt, en):

### I set a break point here but it doesn't stop!
pt = tokenizers.pt.tokenize(pt)
pt = pt.to_tensor()

en = tokenizers.en.tokenize(en)
en = en.to_tensor()
return pt, en


# 4. Make batches
BUFFER_SIZE = 20000
BATCH_SIZE = 64
def make_batches(ds):
return (
ds
.cache()
.shuffle(BUFFER_SIZE)
.batch(BATCH_SIZE)
.map(tokenize_pairs, num_parallel_calls=tf.data.experimental.AUTOTUNE)
.prefetch(tf.data.experimental.AUTOTUNE))

train_batches = make_batches(train_examples)
val_batches = make_batches(val_examples)

print('batch sizes: {} {}'.format(len(train_batches), len(val_batches)))

 

For this piece of code, it doesn't stop at this line:

    pt = tokenizers.pt.tokenize(pt)
3 comments
Comment actions Permalink

Hello, 

Please provide me with a simplified project example and steps to reproduce and upload your logs folder zipped from ***Help | Collect logs and Diagnostic Data***

to the FTP

https://uploads.jetbrains.com/ and please let me know the filename. 

0
Comment actions Permalink

Hello:

Upload ID: 2021_09_22_3MR59i3xzFfM1nHB (file: pycharm-logs-20210922-085011.zip) 

To install the project, here are the requirements.txt in Python3:

absl-py==0.12.0
astunparse==1.6.3
attrs==21.2.0
bert-for-tf2==0.14.9
cachetools==4.2.2
certifi==2021.5.30
charset-normalizer==2.0.4
clang==5.0
click==8.0.1
cycler==0.10.0
dill==0.3.4
filelock==3.0.12
flatbuffers==1.12
future==0.18.2
gast==0.3.3
google-auth==1.35.0
google-auth-oauthlib==0.4.6
google-pasta==0.2.0
googleapis-common-protos==1.53.0
grpcio==1.39.0
h5py==2.10.0
huggingface-hub==0.0.17
idna==3.2
importlib-resources==5.2.2
joblib==1.0.1
keras==2.6.0
keras-nightly==2.5.0.dev2021032900
Keras-Preprocessing==1.1.2
kiwisolver==1.3.2
Markdown==3.3.4
matplotlib==3.4.3
numpy==1.18.5
oauthlib==3.1.1
opt-einsum==3.3.0
packaging==21.0
params-flow==0.8.2
Pillow==8.3.1
promise==2.3
protobuf==3.17.3
py-params==0.10.2
pyasn1==0.4.8
pyasn1-modules==0.2.8
pyparsing==2.4.7
python-dateutil==2.8.2
PyYAML==5.4.1
regex==2021.8.28
requests==2.26.0
requests-oauthlib==1.3.0
rsa==4.7.2
sacremoses==0.0.45
scipy==1.4.1
six==1.15.0
tensorboard==2.6.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.0
tensorflow==2.3.0
tensorflow-datasets==4.4.0
tensorflow-estimator==2.3.0
tensorflow-hub==0.12.0
tensorflow-metadata==1.2.0
tensorflow-text==2.3.0
termcolor==1.1.0
tokenizers==0.10.3
tqdm==4.62.2
transformers==4.10.2
typing-extensions==3.7.4.3
urllib3==1.26.6
Werkzeug==2.0.1
wrapt==1.12.1
zipp==3.5.0

The full code:

import logging
import tensorflow_datasets as tfds
import tensorflow as tf
import tensorflow_text as text

logging.getLogger('tensorflow').setLevel(logging.ERROR) # suppress warnings

examples, metadata = tfds.load('ted_hrlr_translate/pt_to_en', with_info=True,
as_supervised=True)
train_examples, val_examples = examples['train'], examples['validation']

# 2. Get BertTokenizer
model_name = "ted_hrlr_translate_pt_en_converter"
tf.keras.utils.get_file(
f"{model_name}.zip",
f"https://storage.googleapis.com/download.tensorflow.org/models/{model_name}.zip",
cache_dir='.', cache_subdir='', extract=True
)

tokenizers = tf.saved_model.load(model_name)
en_tokenizer_items = [item for item in dir(tokenizers.en) if not item.startswith('_')]
print('En tokenizer methods: ', en_tokenizer_items)

# 3. Tokenizer examples
def tokenize_pairs(pt, en):
pt = tokenizers.pt.tokenize(pt)

# Convert from ragged to dense, padding with zeros.
pt = pt.to_tensor()

en = tokenizers.en.tokenize(en)
# Convert from ragged to dense, padding with zeros.
en = en.to_tensor()
return pt, en


# 4. Make batches
BUFFER_SIZE = 20000
BATCH_SIZE = 64
def make_batches(ds):
return (
ds
.cache()
.shuffle(BUFFER_SIZE)
.batch(BATCH_SIZE)
.map(tokenize_pairs, num_parallel_calls=tf.data.experimental.AUTOTUNE)
.prefetch(tf.data.experimental.AUTOTUNE))

train_batches = make_batches(train_examples)
val_batches = make_batches(val_examples)
#
print('batch sizes: {} {}'.format(len(train_batches), len(val_batches)))
# for (batch, (inp, tar)) in enumerate(train_batches):
# print(batch, inp, tar)


 

0
Comment actions Permalink

To reproduce the problem, just set a breakpoint inside the tokenize_pairs function and debug it, you will find that it won't stop at the breakpoint.

0

Please sign in to leave a comment.